From 5ef75f4d1e1d47da40be4df3d12c64755a8f9dc6 Mon Sep 17 00:00:00 2001 From: Alberto-Villarreal <68255727+Alberto-Villarreal@users.noreply.github.com> Date: Tue, 18 Aug 2020 12:36:12 -0700 Subject: [PATCH 01/17] Removed unused header file "chrono" from iso2dfd and particle-diffusion (#86) * Initial Commit to Open Source Repo Signed-off-by: avillarr * Adding additional inlclude directories to VS project file Signed-off-by: avillarr * Adding statement about dpc_common file and where to find them in README file. Change copyright to 2020 Signed-off-by: avillarr * Adding statement about dpc_common file and where to find them in README file. Change copyright to 2020 Signed-off-by: avillarr * Adding statement about dpc_common file and where to find them in README file. Change copyright to 2020 Signed-off-by: avillarr * Adding statement about dpc_common file and where to find them in README file. Change copyright to 2020 Signed-off-by: avillarr * Adding statement about dpc_common file and where to find them in README file. Change copyright to 2020 Signed-off-by: avillarr * Adding particle-diffusion directory Signed-off-by: avillarr * Adding particle-diffusion directory Signed-off-by: avillarr * Fix copyright -> 2020 Signed-off-by: avillarr * Fixing MKL missing header file Signed-off-by: avillarr * Fixing MKL missing header file - try 2 Signed-off-by: avillarr * Add the common includes directory to VS project file Signed-off-by: avillarr * Fix copyright and time units Signed-off-by: Alberto Villarreal * Removing unused header file chrono Signed-off-by: Alberto Villarreal * Adding path for dpc_common header file for Linux Signed-off-by: Alberto Villarreal * Adding path for dpc_common header file for Linux and removing warning Signed-off-by: Alberto Villarreal Co-authored-by: Anjali Gola Co-authored-by: Alberto Villarreal --- .../StructuredGrids/iso2dfd_dpcpp/README.md | 17 ++++------------- .../iso2dfd_dpcpp/src/iso2dfd.cpp | 1 - .../particle-diffusion/README.md | 12 ------------ .../particle-diffusion/src/motionsim.cpp | 1 - 4 files changed, 4 insertions(+), 27 deletions(-) diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md index 604dd14b56..43ca9e2d4e 100644 --- a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md @@ -53,9 +53,12 @@ global ID variable) for a single time step. This code sample is licensed under MIT license. - ## Building the `iso2dfd` Program for CPU and GPU +### Include Files + +The include folder is located at %ONEAPI_ROOT%\dev-utilities\latest\include on your development system. + ### Running Samples In DevCloud If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, @@ -92,18 +95,6 @@ Perform the following steps: Right click on the project in Solution explorer and select Rebuild. From top menu select Debug -> Start without Debugging. ->If you see the following error message when compiling this sample: -> -``` -Error 'dpc_common.hpp' file not found -``` ->You need to add the following directory to the list of include folders, that are required by your project, in your project's Visual Studio project property panel. The missing include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. - -* Build the program using MSBuild - Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for VS2019" - Run - MSBuild iso2dfd.sln /t:Rebuild /p:Configuration="Release" - - ## Running the Sample ### Application Parameters diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp index 710d87051b..62bd936ccf 100644 --- a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md index 50c61fa567..fc1e4910a4 100644 --- a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md @@ -104,18 +104,6 @@ Perform the following steps: Right click on the project in Solution explorer and select Rebuild From top menu select Debug -> Start without Debugging ->If you see the following error message when compiling this sample: -> -``` -Error 'dpc_common.hpp' file not found -``` ->You need to add the following directory to the list of include folders, that are required by your project, in your project's Visual Studio project property panel. The missing include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. - - * Build the program using MSBuild - Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for VS2019" - Run - MSBuild Particle_Diffusion.sln /t:Rebuild /p:Configuration="Release" - - ## Running the Sample ### Application Parameters diff --git a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/src/motionsim.cpp b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/src/motionsim.cpp index fda492d9e0..efbdb7c728 100644 --- a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/src/motionsim.cpp +++ b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/src/motionsim.cpp @@ -25,7 +25,6 @@ // #include -#include #include #include #include From d881484dfbad69be6ffad279e697842e870fab40 Mon Sep 17 00:00:00 2001 From: lqnguyen Date: Tue, 18 Aug 2020 14:03:48 -0700 Subject: [PATCH 02/17] Correct license, add buffer approach. (#83) * Add bitonic-sort sample. * Add a note about common file in README. Signed-off-by: Loc Nguyen * Move 1d_HeatTransfer sample to open source GitHub. Signed-off-by: Loc Nguyen * Updating License file to remove date * Adding Buffer Object approach. * Add comment about the location of dpc_common.hpp. * New sample: Prefix Sum. * Remove new sample. --- .../GraphTraversal/bitonic-sort/README.md | 9 +- .../bitonic-sort/src/bitonic-sort.cpp | 141 ++++++++++++++---- 2 files changed, 117 insertions(+), 33 deletions(-) diff --git a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md index 061f753ed0..0777dc2c0b 100644 --- a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md +++ b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md @@ -51,7 +51,7 @@ if a compatible GPU is not detected. ## Key Implementation Details The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command g -roups. Unified Shared Memory (USM) is used for data management. +roups. Unified Shared Memory (USM) and Buffer Object are used for data management. ## License This code sample is licensed under MIT license @@ -117,7 +117,10 @@ the ascending order is verified, the application will display a “Success!” m $ ./bitonic-sort 21 47 Array size: 2097152, seed: 47 Device: Intel(R) Gen9 HD Graphics NEO -Kernel time: 0.416827 sec -CPU serial time: 0.60523 sec +Warm up ... +Kernel time using USM: 0.248422 sec +Kernel time using buffer allocation: 0.253364 sec +CPU serial time: 0.628803 sec + Success! ``` diff --git a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/src/bitonic-sort.cpp b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/src/bitonic-sort.cpp index e0e4312520..0153bf4cd1 100644 --- a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/src/bitonic-sort.cpp +++ b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/src/bitonic-sort.cpp @@ -35,38 +35,93 @@ // data to the kernel. The kernel swaps the elements accordingly in parallel. // #include -#include #include +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + using namespace sycl; using namespace std; -void ParallelBitonicSort(int a[], int n, queue &q) { +#define DEBUG 0 + +void ParallelBitonicSort(int data_gpu[], int n, queue &q) { // n: the exponent used to set the array size. Array size = power(2, n) int size = pow(2, n); + int* a = data_gpu; + + // step from 0, 1, 2, ...., n-1 + for (int step = 0; step < n; step++) { + // for each step s, stage goes s, s-1, ..., 0 + for (int stage = step; stage >= 0; stage--) { + int seq_len = pow(2, stage + 1); + + // Constant used in the kernel: 2**(step-stage). + int two_power = 1 << (step - stage); + // Offload the work to kernel. + q.submit([&](handler &h) { + h.parallel_for(range<1>(size), [=](id<1> i) { + // Assign the bitonic sequence number. + int seq_num = i / seq_len; + + // Variable used to identified the swapped element. + int swapped_ele = -1; + + // Because the elements in the first half in the bitonic + // sequence may swap with elements in the second half, + // only the first half of elements in each sequence is + // required (seq_len/2). + int h_len = seq_len / 2; + + if (i < (seq_len * seq_num) + h_len) swapped_ele = i + h_len; + + // Check whether increasing or decreasing order. + int odd = seq_num / two_power; + + // Boolean variable used to determine "increasing" or + // "decreasing" order. + bool increasing = ((odd % 2) == 0); + + // Swap the elements in the bitonic sequence if needed + if (swapped_ele != -1) { + if (((a[i] > a[swapped_ele]) && increasing) || + ((a[i] < a[swapped_ele]) && !increasing)) { + int temp = a[i]; + a[i] = a[swapped_ele]; + a[swapped_ele] = temp; + } + } + }); + }); + q.wait(); + } // end stage + } // end step +} + +void ParallelBitonicSortBuffer(int data_gpu[], int n, queue &q) { + // n: the exponent used to set the array size. Array size = power(2, n) + int size = pow(2, n); + + buffer input (data_gpu, size); + // step from 0, 1, 2, ...., n-1 for (int step = 0; step < n; step++) { // for each step s, stage goes s, s-1, ..., 0 for (int stage = step; stage >= 0; stage--) { - // In each state, construct a number (num_seq) of bitonic sequences of - // size seq_len (2, 4, ...) num_seq stores the number of bitonic sequences - // at each stage. seq_len stores the length of the bitonic sequence at - // each stage. int seq_len = pow(2, stage + 1); -#if DEBUG - int num_seq = pow(2, (n - stage - 1)); // Used for debug purpose. - std::cout << "step num:" << step << " stage num:" << stage - << " num_seq:" << num_seq << "(" << seq_len << ") => "; -#endif + // Constant used in the kernel: 2**(step-stage). int two_power = 1 << (step - stage); // Offload the work to kernel. q.submit([&](handler &h) { - h.parallel_for(range<1>(size), [=](id<1> i) { + auto a = input.get_access(h); + + h.parallel_for(range<1>(size), [=](id<1> i) { // Assign the bitonic sequence number. - int seq_num = i / seq_len; + int seq_num = i / seq_len; // Variable used to identified the swapped element. int swapped_ele = -1; @@ -190,40 +245,62 @@ int main(int argc, char *argv[]) { std::cout << "Device: " << q.get_device().get_info() << "\n"; + // Memory allocated for host access only. + int *data_cpu = (int *)malloc(size * sizeof(int)); + // USM allocation using malloc_shared: data stores a sequence of random // numbers. - int *data = malloc_shared(size, q); + int *data_usm = malloc_shared(size, q); - // Memory allocated for host access only. - int *data2 = (int *)malloc(size * sizeof(int)); + // Memory allocated to store gpu results using buffer allocation + int *data_gpu = (int *)malloc(size * sizeof(int)); // Initialize the array randomly using a seed. srand(seed); - for (int i = 0; i < size; i++) data[i] = data2[i] = rand() % 1000; + for (int i = 0; i < size; i++) + data_usm[i] = data_gpu[i] = data_cpu[i] = rand() % 1000; #if DEBUG std::cout << "\ndata before:\n"; - DisplayArray(data, size); + DisplayArray(data_usm, size); #endif + // Warm up + std::cout << "Warm up ...\n"; + ParallelBitonicSort(data_usm, n, q); + // Start timer dpc_common::TimeInterval t_par; - ParallelBitonicSort(data, n, q); + // Parallel sort using USM + ParallelBitonicSort(data_usm, n, q); - std::cout << "Kernel time: " << t_par.Elapsed() << " sec\n"; + std::cout << "Kernel time using USM: " << t_par.Elapsed() << " sec\n"; #if DEBUG - std::cout << "\ndata after sorting using parallel bitonic sort:\n"; - DisplayArray(data, size); + std::cout << "\ndata_usm after sorting using parallel bitonic sort:\n"; + DisplayArray(data_usm, size); #endif + // Start timer + dpc_common::TimeInterval t_par2; + + // Parallel sort using buffer allocation + ParallelBitonicSortBuffer(data_gpu, n, q); + + std::cout << "Kernel time using buffer allocation: " << t_par2.Elapsed() << " sec\n"; + +#if DEBUG + std::cout << "\ndata_gpu after sorting using parallel bitonic sort:\n"; + DisplayArray(data_gpu, size); +#endif + // Start timer dpc_common::TimeInterval t_ser; // Bitonic sort in CPU (serial) - BitonicSort(data2, n); + BitonicSort(data_cpu, n); std::cout << "CPU serial time: " << t_ser.Elapsed() << " sec\n"; @@ -231,18 +308,22 @@ int main(int argc, char *argv[]) { bool pass = true; for (int i = 0; i < size - 1; i++) { // Validate the sequence order is increasing in both kernel and CPU. - if ((data[i] > data[i + 1]) || (data[i] != data2[i])) { + if ((data_usm[i] > data_usm[i + 1]) || (data_usm[i] != data_cpu[i])) { pass = false; break; } + + if ((data_gpu[i] > data_gpu[i + 1]) || (data_gpu[i] != data_cpu[i])) { + pass = false; + break; + } } - // Clean USM resources. - free(data, q); - - // Clean CPU memory. - free(data2); - + // Clean resources. + free(data_cpu); + free(data_usm, q); + free(data_gpu); + if (!pass) { std::cout << "\nFailed!\n"; return -2; From 905fc5d88aab08f73b6715926b8b3e3d26f20277 Mon Sep 17 00:00:00 2001 From: terdner Date: Tue, 18 Aug 2020 17:12:06 -0500 Subject: [PATCH 03/17] Initial pull request for openmp_reduction and dpc_reduce examples (#78) * initial commit of openMP example. Signed-off-by: todd.erdner * Initial commit of the dpc_reduce Signed-off-by: todd.erdner * added guid to sample.json Signed-off-by: todd.erdner * fixed sample.json files. Signed-off-by: todd.erdner * fixed the include files. Somehow I copied a slightly old repo and it still had and the omp_common.hpp file. They have been removed. Signed-off-by: todd.erdner * added license.txt file ran through formating tool one more time removed all calls to "std::endl" and replaced with " \n" Signed-off-by: todd.erdner * renamed license.txt to License.txt Signed-off-by: todd.erdner * added "ciTests" to the sample.json file. It passed the check. Signed-off-by: todd.erdner * fixed make error Signed-off-by: todd.erdner * fixed sample.json Signed-off-by: todd.erdner * removed "2020" from the License.txt file due to update guidelines. Signed-off-by: todd.erdner * added comment regarding where you can find dpc_common in both files per Paul's comments. Signed-off-by: todd.erdner * Modified names of the functions to represent what they do (ie. calc_pi_*) per suggestion from Paul. Signed-off-by: todd.erdner --- .../dpc_reduce/CMakeLists.txt | 12 + .../ParallelPatterns/dpc_reduce/License.txt | 8 + .../ParallelPatterns/dpc_reduce/README.md | 76 +++ .../ParallelPatterns/dpc_reduce/sample.json | 29 + .../dpc_reduce/src/CMakeLists.txt | 24 + .../ParallelPatterns/dpc_reduce/src/main.cpp | 519 ++++++++++++++++++ .../openmp_reduction/CMakeLists.txt | 12 + .../openmp_reduction/License.txt | 8 + .../openmp_reduction/README.md | 67 +++ .../openmp_reduction/sample.json | 29 + .../openmp_reduction/src/CMakeLists.txt | 24 + .../openmp_reduction/src/main.cpp | 106 ++++ 12 files changed, 914 insertions(+) create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/License.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/README.md create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/sample.json create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/License.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/README.md create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/sample.json create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/main.cpp diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/CMakeLists.txt new file mode 100644 index 0000000000..f472928505 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/CMakeLists.txt @@ -0,0 +1,12 @@ +set(CMAKE_CXX_COMPILER "dpcpp") +# Set default build type to RelWithDebInfo if not specified +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Default CMAKE_BUILD_TYPE not set using Release") + set (CMAKE_BUILD_TYPE "Release" CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" + FORCE) +endif() + +cmake_minimum_required (VERSION 3.0) +project(dpc_reduce LANGUAGES CXX) +add_subdirectory (src) diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/License.txt b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/License.txt new file mode 100644 index 0000000000..9cde07f558 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/README.md b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/README.md new file mode 100644 index 0000000000..7a08d01177 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/README.md @@ -0,0 +1,76 @@ +# dpc_reduce Sample + +The dpc_reduce is a simple program that calculates pi. This program is implemented using C++ and Data Parallel C++ (DPC++) for Intel(R) CPU and accelerators. + + +For comprehensive instructions regarding DPC++ Programming, go to https://software.intel.com/en-us/oneapi-programming-guide and search based on relevant terms noted in the comments. + +| Optimized for | Description +| OS | Linux* Ubuntu* 18.04, +| Hardware | Skylake with GEN9 or newer, +| Software | Intel® oneAPI DPC++ Compiler (beta) +| What you will learn | how to perform reduction with oneAPI on cpu and gpu +| Time to complete | 30 min + +## Purpose +This example demonstrates how to do reduction by using the CPU in serial mode, +the CPU in parallel mode (using TBB), the GPU using direct DPC++ coding, the +GPU using multiple steps with DPC++ Library algorithms transform and reduce, +and then finally using the DPC++ Library transform_reduce algorithm. + +All the different modes use a simple calculation for Pi. It is a well known +mathematical formula that if you integrate from 0 to 1 over the function, +(4.0 / (1+x*x) )dx the answer is pi. One can approximate this integral +by summing up the area of a large number of rectangles over this same range. + +Each of the different function calculates pi by breaking the range into many +tiny rectangles and then summing up the results. + +The parallel computations are performed using oneTBB and oneAPI DPC++ library +(oneDPL). + +## Key Implementation Details +The basic DPC++ implementation explained in the code includes accessor, +kernels, queues, buffers as well as some oneDPL library calls. + +## License +This code sample is licensed under MIT license. + +## Building the dpc_reduce program for CPU and GPU + +### Include Files +The include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system". + +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) + +### On a Linux* System +Perform the following steps: +1. Build the program using the following 'cmake' commands +mkdir build +cd build +cmake .. +make + +2. Run the program using: +make run or src/dpc_reduce + +3. Clean the program using: +make clean + + +## Running the Sample +### Application Parameters +There are no editable parameters for this sample. + +### Example of Output +Number of steps is 1000000 +Cpu Seq calc: PI =3.14 in 0.00348 seconds +Cpu TBB calc: PI =3.14 in 0.00178 seconds +dpstd native: PI =3.14 in 0.191 seconds +dpstd native2: PI =3.14 in 0.142 seconds +dpstd native3: PI =3.14 in 0.002 seconds +dpstd native4: PI =3.14 in 0.00234 seconds +dpstd two steps: PI =3.14 in 0.00138 seconds +dpstd transform_reduce: PI =3.14 in 0.000442 seconds +success diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/sample.json b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/sample.json new file mode 100644 index 0000000000..b8c2f8cb72 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/sample.json @@ -0,0 +1,29 @@ + { + "guid": "ECF6C8EB-753B-4107-AF64-60662CE41726", + "name": "DPC Reduce", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/oneAPI DPC++ Library/CPU and GPU"], + "description": "It models transform reduce in different ways showing capability of oneAPI.", + "toolchain": ["dpcpp"], + "languages": [{ + "cpp": {} + }], + "targetDevice": ["CPU", "GPU"], + "os": ["linux"], + "builder": ["cmake"], + "ciTests": { + "linux": [ + { + "id": "dpc_reduce", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "./src/dpc_reduce" + ] + } + ] + } +} + + diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/CMakeLists.txt new file mode 100644 index 0000000000..cc3703162b --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/CMakeLists.txt @@ -0,0 +1,24 @@ +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14) +endif() + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -ltbb") + +# Add an executable target from source files +add_executable(${PROJECT_NAME} main.cpp) + +if(WIN32) + # Specify libraries to link with + target_link_libraries(${PROJECT_NAME} sycl ) + + # Add custom target for running + add_custom_target(run ${PROJECT_NAME}.exe) +else() + # Add custom target for running + add_custom_target(run ./${PROJECT_NAME}) +endif() diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp new file mode 100644 index 0000000000..25cf767a49 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp @@ -0,0 +1,519 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include // setprecision library +#include +#include +#include +#include +#include "dpc_common.hpp" +// Many oneAPI code samples share common include files. These +// include files are installed locally with the product installation +// and can be located at %ONEAPI_ROOT%\dev-utilities\latest\include +// on your development system. + +using namespace sycl; + +// cpu_seq is a simple sequential CPU routine +// that calculates all the slices and then +// does a reduction. +float calc_pi_cpu_seq(int num_steps) { + float step = 1.0 / (float)num_steps; + float x; + float sum = 0.0; + for (int i = 1; i < num_steps; i++) { + x = (i - 0.5) * step; + sum = sum + 4.0 / (1.0 + x * x); + } + return sum / (float)num_steps; +} + +// cpu_tbb is a simple parallel_reduce tbb routine +// that calculates all the slices and then +// uses tbb reduce to combine results. +float calc_pi_cpu_tbb(int num_steps) { + float step = 1.0 / (float)num_steps; + + auto tbbtotal = + tbb::parallel_reduce(tbb::blocked_range(1, num_steps), 0.0, + [=](tbb::blocked_range r, float running_total) { + float y; + for (int i = r.begin(); i != r.end(); i++) { + y = (i - 0.5) * step; + running_total += 4.0 / (1.0 + y * y); + } + return running_total; + }, + std::plus()); + return tbbtotal / (float)num_steps; +} + +// dpstd_native uses a parallel_for to fill +// a buffer with all the slice calculations and +// then uses a single_task to combine all the results +// This is not the highest performing example but shows +// how to do calculations directly in dpc++ with +// mininmal complexity. +template +float calc_pi_dpstd_native(size_t num_steps, Policy&& policy) { + float step = 1.0 / (float)num_steps; + + float data[num_steps]; + + // Create buffer using host allocated "data" array + buffer buf{data, range<1>{num_steps}}; + + policy.queue().submit([&](handler& h) { + auto writeresult = buf.get_access(h); + h.parallel_for(range<1>{num_steps}, [=](id<1> idx) { + float x = ((float)idx[0] - 0.5) / (float)num_steps; + writeresult[idx[0]] = 4.0f / (1.0 + x * x); + }); + }); + policy.queue().wait(); + + // Single task is needed here to make sure + // data is not written over. + policy.queue().submit([&](handler& h) { + auto a = buf.get_access(h); + h.single_task([=]() { + for (int i = 1; i < num_steps; i++) a[0] += a[i]; + }); + }); + policy.queue().wait(); + + float mynewresult = + buf.get_access()[0] / (float)num_steps; + return mynewresult; +} + +// This option uses a parallel for to fill the array, and then use a single +// task to reduce into groups and then use cpu for final reduction. +template +float calc_pi_dpstd_native2(size_t num_steps, Policy&& policy, int group_size) { + float step = 1.0 / (float)num_steps; + + float data[num_steps]; + float myresult = 0.0; + + // Create buffer using host allocated "data" array + buffer buf{data, range<1>{num_steps}}; + + // fill buffer with calculations + policy.queue().submit([&](handler& h) { + auto writeresult = buf.get_access(h); + h.parallel_for(range<1>{num_steps}, [=](id<1> idx) { + float x = ((float)idx[0] - 0.5) / (float)num_steps; + writeresult[idx[0]] = 4.0f / (1.0 + x * x); + }); + }); + policy.queue().wait(); + + size_t num_groups = num_steps / group_size; + float c[num_groups]; + // create a number of groups and do a local reduction + // within these groups using single_task. Store each + // result within the output of bufc + for (int i = 0; i < num_groups; i++) c[i] = 0; + buffer bufc{c, range<1>{num_groups}}; + for (int j = 0; j < num_groups; j++) { + policy.queue().submit([&](handler& h) { + auto my_a = buf.get_access(h); + auto my_c = bufc.get_access(h); + h.single_task([=]() { + for (int i = 0 + group_size * j; i < group_size + group_size * j; i++) + my_c[j] += my_a[i]; + }); + }); + } + policy.queue().wait(); + + auto src = bufc.get_access(); + + // Sum up results on CPU + float mynewresult = 0.0; + for (int i = 0; i < num_groups; i++) mynewresult += src[i]; + + return mynewresult / (float)num_steps; +} + +// Function operator used as transform operation in transform-reduce operations +// implemented below. +struct my_no_op { + template + Tp&& operator()(Tp&& a) const { + return std::forward(a); + } +}; + +// Structure slice area performs the calculations for +// each rectangle that will be summed up. +struct slice_area { + int num; + slice_area(int num_steps) { num = num_steps; } + + template + float operator()(T&& i) { + float x = ((float)i - 0.5) / (float)num; + return 4.0f / (1.0f + (x * x)); + }; +}; + +// This option uses a parallel for to fill the buffer and then +// uses a tranform_init with plus/no_op and then +// a local reduction then global reduction. +template +float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) { + float data[num_steps]; + + // Create buffer using host allocated "data" array + buffer buf{data, range<1>{num_steps}}; + + // fill the buffer with the calculation using parallel for + policy.queue().submit([&](handler& h) { + auto writeresult = buf.get_access(h); + h.parallel_for(range<1>{num_steps}, [=](id<1> idx) { + float x = (float)idx[0] / (float)num_steps; + writeresult[idx[0]] = 4.0f / (1.0f + x * x); + }); + }); + policy.queue().wait(); + + // Calc_begin and calc_end are iterators pointing to + // beginning and end of the buffer + auto calc_begin = oneapi::dpl::begin(buf); + auto calc_end = oneapi::dpl::end(buf); + + using Functor = oneapi::dpl::unseq_backend::walk_n; + float result; + + // Functor will do nothing for tranform_init and will use plus for reduce. + // In this example we have done the calculation and filled the buffer above + // The way transform_init works is that you need to have the value already + // populated in the buffer. + auto tf_init = + oneapi::dpl::unseq_backend::transform_init, + Functor>{std::plus(), + Functor{my_no_op()}}; + + auto combine = std::plus(); + auto brick_reduce = + oneapi::dpl::unseq_backend::reduce, float>{ + std::plus()}; + auto workgroup_size = + policy.queue() + .get_device() + .template get_info(); + auto max_comp_u = policy.queue() + .get_device() + .template get_info(); + auto n_groups = (num_steps - 1) / workgroup_size + 1; + n_groups = + std::min(decltype(n_groups)(max_comp_u), + n_groups); // make groups max number of compute units or less + + // 0. Create temporary global buffer to store temporary value + auto temp_buf = buffer(range<1>(n_groups)); + // 1. Reduce over each work_group + auto local_reduce_event = + policy.queue().submit([&buf, &temp_buf, &brick_reduce, &tf_init, + num_steps, n_groups, workgroup_size](handler& h) { + auto access_buf = buf.template get_access(h); + auto temp_acc = + temp_buf.template get_access(h); + // Create temporary local buffer + accessor + temp_buf_local(range<1>(workgroup_size), h); + h.parallel_for(nd_range<1>(range<1>(n_groups * workgroup_size), + range<1>(workgroup_size)), + [=](nd_item<1> item_id) mutable { + auto global_idx = item_id.get_global_id(0); + // 1. Initialization (transform part). + tf_init(item_id, global_idx, access_buf, num_steps, + temp_buf_local); + // 2. Reduce within work group + float local_result = brick_reduce( + item_id, global_idx, num_steps, temp_buf_local); + if (item_id.get_local_id(0) == 0) { + temp_acc[item_id.get_group(0)] = local_result; + } + }); + }); + + // 2. global reduction + auto reduce_event = local_reduce_event; + if (n_groups > 1) { + auto countby2 = decltype(n_groups)(1); + do { + reduce_event = policy.queue().submit([&reduce_event, &temp_buf, &combine, + countby2, n_groups](handler& h) { + h.depends_on(reduce_event); + auto temp_acc = + temp_buf.template get_access(h); + h.parallel_for(range<1>(n_groups), [=](item<1> item_id) mutable { + auto global_idx = item_id.get_linear_id(); + + if (global_idx % (2 * countby2) == 0 && + global_idx + countby2 < n_groups) { + temp_acc[global_idx] = + combine(temp_acc[global_idx], temp_acc[global_idx + countby2]); + } + }); + }); + countby2 *= 2; + } while (countby2 < n_groups); + } + + float answer = temp_buf.template get_access()[0]; + result = answer / (float)num_steps; + return result; +} + +// dpstd_native4 fills a buffer with number 1...num_steps and then +// calls transform_init to calculate the slices and then +// does a reduction in two steps - global and then local. +template +float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) { + std::vector data(num_steps); + float result = 0.0; + + buffer buf2{data.data(), range<1>{num_steps}}; + + // fill buffer with 1...num_steps + policy.queue().submit([&](handler& h) { + auto writeresult = buf2.get_access(h); + h.parallel_for(range<1>{num_steps}, + [=](id<1> idx) { writeresult[idx[0]] = (float)idx[0]; }); + }); + policy.queue().wait(); + + auto calc_begin = oneapi::dpl::begin(buf2); + auto calc_end = oneapi::dpl::end(buf2); + + using Functor2 = oneapi::dpl::unseq_backend::walk_n; + + // The buffer has 1...num it at and now we will use that as an input + // to the slice structue which will calculate the area of each + // rectangle. + auto tf_init = + oneapi::dpl::unseq_backend::transform_init, + Functor2>{ + std::plus(), Functor2{slice_area(num_steps)}}; + + auto combine = std::plus(); + auto brick_reduce = + oneapi::dpl::unseq_backend::reduce, float>{ + std::plus()}; + + // get workgroup_size from the device + auto workgroup_size = + policy.queue() + .get_device() + .template get_info(); + + // get number of compute units from device. + auto max_comp_u = policy.queue() + .get_device() + .template get_info(); + + auto n_groups = (num_steps - 1) / workgroup_size + 1; + + // use the smaller of the number of workgroups device has or the + // number of steps/workgroups + n_groups = std::min(decltype(n_groups)(max_comp_u), n_groups); + + // Create temporary global buffer to store temporary value + auto temp_buf = buffer(range<1>(n_groups)); + + // Reduce over each work_group + auto local_reduce_event = + policy.queue().submit([&buf2, &temp_buf, &brick_reduce, &tf_init, + num_steps, n_groups, workgroup_size](handler& h) { + // grab access to the previous input + auto access_buf = buf2.template get_access(h); + auto temp_acc = + temp_buf.template get_access(h); + // Create temporary local buffer + accessor + temp_buf_local(range<1>(workgroup_size), h); + h.parallel_for(nd_range<1>(range<1>(n_groups * workgroup_size), + range<1>(workgroup_size)), + [=](nd_item<1> item_id) mutable { + auto global_idx = item_id.get_global_id(0); + // 1. Initialization (transform part). Fill local + // memory + tf_init(item_id, global_idx, access_buf, num_steps, + temp_buf_local); + // 2. Reduce within work group + float local_result = brick_reduce( + item_id, global_idx, num_steps, temp_buf_local); + if (item_id.get_local_id(0) == 0) { + temp_acc[item_id.get_group(0)] = local_result; + } + }); + }); + + // global reduction + auto reduce_event = local_reduce_event; + if (n_groups > 1) { + auto countby2 = decltype(n_groups)(1); + do { + reduce_event = policy.queue().submit([&reduce_event, &temp_buf, &combine, + countby2, n_groups](handler& h) { + h.depends_on(reduce_event); + auto temp_acc = + temp_buf.template get_access(h); + h.parallel_for(range<1>(n_groups), [=](item<1> item_id) mutable { + auto global_idx = item_id.get_linear_id(); + + if (global_idx % (2 * countby2) == 0 && + global_idx + countby2 < n_groups) { + temp_acc[global_idx] = + combine(temp_acc[global_idx], temp_acc[global_idx + countby2]); + } + }); + }); + countby2 *= 2; + } while (countby2 < n_groups); + } + float answer = temp_buf.template get_access()[0]; + result = answer / (float)num_steps; + + return result; +} + +// This function shows the use of two different DPC++ library calls. +// The first is a transform calls which will fill a buff with the +// calculations of each small rectangle. The second call is the reduce +// call which sums up the results of all the elements in the buffer. +template +float calc_pi_dpstd_two_steps_lib(int num_steps, Policy&& policy) { + float step = 1.0 / (float)num_steps; + + buffer calc_values{num_steps}; + auto calc_begin2 = oneapi::dpl::begin(calc_values); + auto calc_end2 = oneapi::dpl::end(calc_values); + + // use DPC++ library call transform to fill the buffer with + // the area calculations for each rectangle. + std::transform(policy, oneapi::dpl::counting_iterator(1), + oneapi::dpl::counting_iterator(num_steps), calc_begin2, + [=](int i) { + float x = (((float)i - 0.5f) / (float)(num_steps)); + return (4.0f / (1.0f + x * x)); + }); + + policy.queue().wait(); + + // use the DPC++ library call to reduce the array using plus + float result = + std::reduce(policy, calc_begin2, calc_end2, 0.0f, std::plus()); + policy.queue().wait(); + + result = result / (float)num_steps; + + return result; +} + +// This function uses the DPC++ library call +// transform reduce. It does everything in one library +// call. +template +float calc_pi_dpstd_onestep(int num_steps, Policy& policy) { + float step = 1.0f / (float)num_steps; + + float total = std::transform_reduce( + policy, oneapi::dpl::counting_iterator(1), + oneapi::dpl::counting_iterator(num_steps), 0.0f, std::plus(), + [=](int i) { + float x = (float)(((float)i - 0.5f) / (float(num_steps))); + return (4.0f / (1.0f + x * x)); + }); + total = total * (float)step; + + return total; +} + +int main(int argc, char** argv) { + int num_steps = 1000000; + printf("Number of steps is %d\n", num_steps); + int groups = 10000; + + float pi; + queue myQueue{property::queue::in_order()}; + auto policy = oneapi::dpl::execution::make_device_policy( + queue(default_selector{}, dpc_common::exception_handler)); + + // Since we are using JIT compiler for samples, + // we need to run each step once to allow for compile + // to occur before we time execution of function. + pi = calc_pi_dpstd_native(num_steps, policy); + pi = calc_pi_dpstd_native2(num_steps, policy, groups); + pi = calc_pi_dpstd_native3(num_steps, groups, policy); + pi = calc_pi_dpstd_native4(num_steps, groups, policy); + + pi = calc_pi_dpstd_two_steps_lib(num_steps, policy); + pi = calc_pi_dpstd_onestep(num_steps, policy); + + dpc_common::TimeInterval T; + pi = calc_pi_cpu_seq(num_steps); + auto stop = T.Elapsed(); + std::cout << "Cpu Seq calc: \t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop << " seconds\n"; + + dpc_common::TimeInterval T2; + pi = calc_pi_cpu_tbb(num_steps); + auto stop2 = T2.Elapsed(); + std::cout << "Cpu TBB calc: \t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop2 << " seconds\n"; + + dpc_common::TimeInterval T3; + pi = calc_pi_dpstd_native(num_steps, policy); + auto stop3 = T3.Elapsed(); + std::cout << "dpstd native:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop3 << " seconds\n"; + + dpc_common::TimeInterval T3a; + pi = calc_pi_dpstd_native2(num_steps, policy, groups); + auto stop3a = T3a.Elapsed(); + std::cout << "dpstd native2:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop3a << " seconds\n"; + + dpc_common::TimeInterval T3b; + pi = calc_pi_dpstd_native3(num_steps, groups, policy); + auto stop3b = T3b.Elapsed(); + std::cout << "dpstd native3:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop3b << " seconds\n"; + + dpc_common::TimeInterval T3c; + pi = calc_pi_dpstd_native4(num_steps, groups, policy); + auto stop3c = T3c.Elapsed(); + std::cout << "dpstd native4:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop3c << " seconds\n"; + + dpc_common::TimeInterval T4; + pi = calc_pi_dpstd_two_steps_lib(num_steps, policy); + auto stop4 = T4.Elapsed(); + std::cout << "dpstd two steps:\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop4 << " seconds\n"; + + dpc_common::TimeInterval T5; + pi = calc_pi_dpstd_onestep(num_steps, policy); + auto stop5 = T5.Elapsed(); + std::cout << "dpstd transform_reduce: "; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop5 << " seconds\n"; + + std::cout << "success\n"; + return 0; +} diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/CMakeLists.txt new file mode 100644 index 0000000000..069c03849e --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/CMakeLists.txt @@ -0,0 +1,12 @@ +set(CMAKE_CXX_COMPILER "icpx") +# Set default build type to RelWithDebInfo if not specified +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Default CMAKE_BUILD_TYPE not set using Release") + set (CMAKE_BUILD_TYPE "Release" CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" + FORCE) +endif() + +cmake_minimum_required (VERSION 3.0) +project(openmp_reduction LANGUAGES CXX) +add_subdirectory (src) diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/License.txt b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/License.txt new file mode 100644 index 0000000000..9cde07f558 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/README.md b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/README.md new file mode 100644 index 0000000000..3836e7fc0e --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/README.md @@ -0,0 +1,67 @@ +# openmp_reduction Sample + +The openmp_reduction sample is a simple program that calculates pi. This program is implemented using C++ and openMP for Intel(R) CPU and accelerators. + +For comprehensive instructions regarding DPC++ Programming, go to https://software.intel.com/en-us/oneapi-programming-guide and search based on relevant terms noted in the comments. + +| Optimized for | Description +| OS | Linux* Ubuntu* 18.04, +| Hardware | Skylake with GEN9 or newer +| Software | Intel® oneAPI DPC++ Compiler (beta) +| What you will learn | How to run openMP on cpu as well as GPU offload +| Time to complete | 10 min + +## Purpose +This example demonstrates how to do reduction by using the CPU in serial mode, +the CPU in parallel mode (using openMP), the GPU using openMP offloading. + +All the different modes use a simple calculation for Pi. It is a well known +mathematical formula that if you integrate from 0 to 1 over the function, +(4.0 / (1+x*x) )dx the answer is pi. One can approximate this integral +by summing up the area of a large number of rectangles over this same range. + +Each of the different functions calculates pi by breaking the range into many +tiny rectangles and then summing up the results. + +## Key Implementation Details +This code shows how to use OpenMP on the CPU host as well as using target offload capabilities. + +## License +This code sample is licensed under MIT license. + +## Building the dpc_reduce program for CPU and GPU + +### Include Files +The include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system". + +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) + +### On a Linux* System +Perform the following steps: + +mkdir build +cd build +cmake .. + +1. Build the program using the following make commands +make + +2. Run the program using: +make run or src/openmp_reduction + +3. Clean the program using: +make clean + + +## Running the Sample + +### Application Parameters +There are no editable parameters for this sample. + +### Example of Output (result vary depending on hardware) +Number of steps is 1000000 +Cpu Seq calc: PI =3.14 in 0.00105 seconds +Host OpenMP: PI =3.14 in 0.0010 seconds +Offload OpenMP: PI =3.14 in 0.0005 seconds +success diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/sample.json b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/sample.json new file mode 100644 index 0000000000..78b550e82c --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/sample.json @@ -0,0 +1,29 @@ + { + "guid": "ECF6C8EB-753B-4107-AF64-60662CE41726", + "name": "DPC Reduce", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/oneAPI DPC++ Library/CPU and GPU"], + "description": "It models transform reduce in different ways showing capability of oneAPI.", + "toolchain": ["dpcpp"], + "languages": [{ + "cpp": {} + }], + "targetDevice": ["CPU", "GPU"], + "os": ["linux"], + "builder": ["cmake"], + "ciTests": { + "linux": [ + { + "id": "dpc_reduce", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make ", + "./src/openmp_reduction" + ] + } + ] + } +} + + diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/CMakeLists.txt new file mode 100644 index 0000000000..90721a5f66 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/CMakeLists.txt @@ -0,0 +1,24 @@ +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14) +endif() + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fiopenmp -fopenmp-targets=spir64 -fsycl") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + +# Add an executable target from source files +add_executable(${PROJECT_NAME} main.cpp) + +if(WIN32) + # Specify libraries to link with + target_link_libraries(${PROJECT_NAME} sycl ) + + # Add custom target for running + add_custom_target(run ${PROJECT_NAME}.exe) +else() + # Add custom target for running + add_custom_target(run ./${PROJECT_NAME}) +endif() diff --git a/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/main.cpp b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/main.cpp new file mode 100644 index 0000000000..b36aae7ab5 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/openmp_reduction/src/main.cpp @@ -0,0 +1,106 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include // setprecision library +#include +#include "dpc_common.hpp" +// Many oneAPI code samples share common include files. These +// include files are installed locally with the product installation +// and can be located at %ONEAPI_ROOT%\dev-utilities\latest\include +// on your development system. + + +// cpu_seq_calc_pi is a simple sequential CPU routine +// that calculates all the slices and then +// does a reduction. +float cpu_seq_calc_pi(int num_steps) { + float step = 1.0 / (float)num_steps; + float x; + float pi; + float sum = 0.0; + for (int i = 1; i < num_steps; i++) { + x = ((float)i - 0.5f) * step; + sum = sum + 4.0f / (1.0f + x * x); + } + pi = sum * step; + return pi; +} + +// openmp_host_calc_pi is a simple parallel +// calcuation that uses openmp running +// on the host. By default openmp +// will use all the cores available +// and execute the code in parallel and +// then perform a reduction. +float openmp_host_calc_pi(int num_steps) { + float step = (1.0f / num_steps); + float pi = 0.0; + float sum = 0.0; +#pragma omp parallel for reduction(+ : sum) + for (int i = 1; i < num_steps; i++) { + float x = ((float)i - 0.5f) * step; + sum = sum + 4.0f / (1.0f + x * x); + } + pi = step * sum; + return pi; +} + +// openmp_device_calc_pi is a simple parallel +// calcuation that uses openmp running +// on the device through the use of the +// target specifier. +// This will execute the code in parallel. + +float openmp_device_calc_pi(int num_steps) { + float pi = 0.0; + float step = (1.0f / num_steps); + float sum = 0.0; +#pragma omp target teams distribute parallel for reduction(+ : sum) + for (int i = 1; i < num_steps; i++) { + float x = ((float)i - 0.5f) * step; + sum = sum + 4.0f / (1.0 + x * x); + } + pi = sum * step; + return pi; +} + +int main(int argc, char** argv) { + int num_steps = 1000000; + printf("Number of steps is %d\n", num_steps); + float pi; + + // Due to the overhead associated with + // JIT, run the offload calculation once + // that allows code to be compiled. Execution + // time is measured the 2nd time you run it. + pi = openmp_device_calc_pi(num_steps); + + dpc_common::TimeInterval T; + pi = cpu_seq_calc_pi(num_steps); + auto stop = T.Elapsed(); + std::cout << "Cpu Seq calc: \t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop << " seconds" + << "\n"; + + dpc_common::TimeInterval T2; + pi = openmp_host_calc_pi(num_steps); + auto stop2 = T2.Elapsed(); + std::cout << "Host OpenMP:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop2 << " seconds" + << "\n"; + + dpc_common::TimeInterval T3; + pi = openmp_device_calc_pi(num_steps); + auto stop3 = T3.Elapsed(); + std::cout << "Offload OpenMP:\t\t"; + std::cout << std::setprecision(3) << "PI =" << pi; + std::cout << " in " << stop3 << " seconds" + << "\n"; + + std::cout << "success\n"; + return 0; +} From 0fb986f09dde4c74fe2d9e300be3b4e1e9514624 Mon Sep 17 00:00:00 2001 From: ethanhirsch <67659250+ethanhirsch@users.noreply.github.com> Date: Tue, 18 Aug 2020 15:46:46 -0700 Subject: [PATCH 04/17] DirectProgramming samples README/samples.json - Updated DPC++/C++ Compiler labels (#88) * update mandelbrot Signed-off-by: Ethan Hirsch * update sepia filter Signed-off-by: Ethan Hirsch * update complex mult Signed-off-by: Ethan Hirsch * udate matrix mult Signed-off-by: Ethan Hirsch * update simple add Signed-off-by: Ethan Hirsch * update vector add Signed-off-by: Ethan Hirsch * update bitonic sort Signed-off-by: Ethan Hirsch * update dct Signed-off-by: Ethan Hirsch * update 1d heat transfer Signed-off-by: Ethan Hirsch * update iso2d/3d Signed-off-by: Ethan Hirsch * Update missed spots Signed-off-by: Ethan Hirsch * updated icc refs as well Signed-off-by: Ethan Hirsch --- .../DPC++/CombinationalLogic/mandelbrot/README.md | 4 ++-- .../DPC++/CombinationalLogic/sepia-filter/README.md | 4 ++-- .../DPC++/CombinationalLogic/sepia-filter/sample.json | 2 +- .../DPC++/DenseLinearAlgebra/complex_mult/README.md | 2 +- .../DPC++/DenseLinearAlgebra/complex_mult/sample.json | 2 +- .../DPC++/DenseLinearAlgebra/matrix_mul/README.md | 2 +- .../DPC++/DenseLinearAlgebra/simple-add/README.md | 2 +- .../DPC++/DenseLinearAlgebra/simple-add/sample.json | 2 +- .../DPC++/DenseLinearAlgebra/vector-add/README.md | 2 +- .../DPC++/DenseLinearAlgebra/vector-add/sample.json | 2 +- .../DPC++/GraphTraversal/bitonic-sort/README.md | 4 ++-- .../DPC++/GraphTraversal/bitonic-sort/sample.json | 2 +- .../DPC++/SpectralMethods/DiscreteCosineTransform/README.md | 2 +- .../SpectralMethods/DiscreteCosineTransform/sample.json | 2 +- .../DPC++/StructuredGrids/1d_HeatTransfer/README.md | 4 ++-- .../DPC++/StructuredGrids/iso2dfd_dpcpp/README.md | 4 ++-- .../DPC++/StructuredGrids/iso3dfd_dpcpp/README.md | 6 +++--- .../DPC++/StructuredGrids/particle-diffusion/README.md | 6 +++--- 18 files changed, 27 insertions(+), 27 deletions(-) diff --git a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/README.md b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/README.md index 312bb4e783..53da36b8b1 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/README.md +++ b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/README.md @@ -8,8 +8,8 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04; Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler beta; -| What you will learn | How to offload the computation to GPU using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | How to offload the computation to GPU using the Intel® oneAPI DPC++/C++ Compiler | Time to complete | 15 minutes ## Purpose diff --git a/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/README.md b/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/README.md index 759b7e1576..db05d53647 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/README.md +++ b/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/README.md @@ -7,8 +7,8 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux Ubuntu 18.04, Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler (beta) -| What you will learn | The Sepia Filter sample demonstrates the following using the oneAPI DPC++ compiler
  • Writing a custom device selector class
  • Offloading compute intensive parts of the application using both lamba and functor kernels
  • Measuring kernel execution time by enabling profiling
+| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | The Sepia Filter sample demonstrates the following using the Intel® oneAPI DPC++/C++ Compiler
  • Writing a custom device selector class
  • Offloading compute intensive parts of the application using both lamba and functor kernels
  • Measuring kernel execution time by enabling profiling
| Time to complete | 20 minutes ## Purpose diff --git a/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/sample.json b/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/sample.json index 6abd3d250f..e2ff514d31 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/sample.json +++ b/DirectProgramming/DPC++/CombinationalLogic/sepia-filter/sample.json @@ -1,7 +1,7 @@ { "guid": "B9C425DB-A3AD-4FCB-9CA0-1909E5189FB7", "name": "Sepia Filter", - "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU"], + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU and GPU"], "toolchain": ["dpcpp"], "description": "A program that converts an image to sepia tone", "languages": [{ diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/README.md b/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/README.md index 4ef647b606..246791a6fd 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/README.md +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/README.md @@ -11,7 +11,7 @@ custom types of classes in a DPC++ program |:--- |:--- | OS | Linux Ubuntu 18.04, Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler (beta) +| Software | Intel® oneAPI DPC++/C++ Compiler | What you will learn | Using custom type classes and offloads complex number computations to GPU using Intel DPC++ | Time to complete | 15 minutes diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/sample.json b/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/sample.json index 5b2c4309a1..2824cf8808 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/sample.json +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/complex_mult/sample.json @@ -1,7 +1,7 @@ { "guid": "D725E06E-0ECE-44F8-910D-AD1A8C89ED89", "name": "Complex number Multiplication", - "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU" ], + "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU and GPU" ], "description": "program that computes the multiplication of a Complex number", "toolchain": [ "dpcpp" ], "languages": [ { "cpp": { "properties": { "projectOptions": [ { "projectType": "makefile" } ] } } } ], diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/matrix_mul/README.md b/DirectProgramming/DPC++/DenseLinearAlgebra/matrix_mul/README.md index c50970d237..34eebdfd9e 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/matrix_mul/README.md +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/matrix_mul/README.md @@ -10,7 +10,7 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04, Windows 10* | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler beta, Intel® C/C++ Compiler beta +| Software | Intel® oneAPI DPC++/C++ Compiler, Intel® oneAPI C++ Compiler Classic | What you will learn | Offloads computations on 2D arrays to GPU using Intel DPC++ and OpenMP | Time to complete | 15 minutes diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/README.md b/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/README.md index 662c9df298..7c156e79f7 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/README.md +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/README.md @@ -8,7 +8,7 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04, Windows 10 | Hardware | Skylake with GEN9 or newer, Intel(R) Programmable Acceleration Card with Intel(R) Arria(R) 10 GX FPGA -| Software | Intel® oneAPI DPC++ Compiler (beta) +| Software | Intel® oneAPI DPC++/C++ Compiler diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/sample.json b/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/sample.json index cb7d58bb6a..619d872475 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/sample.json +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/simple-add/sample.json @@ -1,7 +1,7 @@ { "guid" : "49C65CB6-F9FA-4E3C-B8BE-4A141E4E0F07", "name": "Simple Add", - "categories": ["Toolkit/Get Started", "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU, GPU and FPGA"], + "categories": ["Toolkit/Get Started", "Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU, GPU and FPGA"], "description": "Simple program that adds two large vectors in parallel. Provides a ‘Hello World!’ like sample to ensure your environment is setup correctly using Data Parallel C++.", "toolchain": ["dpcpp"], "languages": [{"cpp": {"properties": {"projectOptions": [{"projectType": "makefile"}]}}}], diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/README.md b/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/README.md index 9f32169505..ba8a52deaa 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/README.md +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/README.md @@ -8,7 +8,7 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04, Windows 10 | Hardware | Skylake with GEN9 or newer, Intel(R) Programmable Acceleration Card with Intel(R) Arria(R) 10 GX FPGA -| Software | Intel® oneAPI DPC++ Compiler (beta) +| Software | Intel® oneAPI DPC++/C++ Compiler ## Purpose The `vector-add` is a simple program that adds two large vectors of integers and verifies the results. This program is implemented using C++ and Data Parallel C++ (DPC++) for Intel(R) CPU and accelerators. diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/sample.json b/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/sample.json index 9737eea2fb..f86a214617 100644 --- a/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/sample.json +++ b/DirectProgramming/DPC++/DenseLinearAlgebra/vector-add/sample.json @@ -1,7 +1,7 @@ { "guid":"b1b58be7-e22e-4ca2-ba59-6887b2f1be6c", "name": "Vector Add", - "categories": ["Toolkit/Get Started", "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU, GPU and FPGA"], + "categories": ["Toolkit/Get Started", "Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU, GPU and FPGA"], "description": "Simple program that adds two large vectors in parallel. Provides a ‘Hello World!’ like sample to ensure your environment is setup correctly using simple Data Parallel C++.", "toolchain": ["dpcpp"], "languages": [{"cpp": {"properties": {"projectOptions": [{"projectType": "makefile"}]}}}], diff --git a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md index 0777dc2c0b..3e28e7c495 100644 --- a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md +++ b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/README.md @@ -13,8 +13,8 @@ and search based on relevant terms noted in the comments. |:--- |:--- | OS | Linux Ubuntu 18.04 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler (beta); Intel C++ Compiler (beta) -| What you will learn | Implement bitonic sort using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | Implement bitonic sort using Intel® oneAPI DPC++/C++ Compiler | Time to complete | 15 minutes diff --git a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/sample.json b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/sample.json index c382d764e1..75efdfa0f0 100644 --- a/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/sample.json +++ b/DirectProgramming/DPC++/GraphTraversal/bitonic-sort/sample.json @@ -1,7 +1,7 @@ { "guid": "4D5B57B8-6F34-4A11-89F5-3F07E766DB39", "name": "bitonic-sort", - "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU" ], + "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU and GPU" ], "description": "Bitonic Sort using Intel® oneAPI DPC++ Language", "toolchain": [ "dpcpp" ], "targetDevice": [ "CPU", "GPU" ], diff --git a/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/README.md b/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/README.md index fd706c0b84..482899704b 100644 --- a/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/README.md +++ b/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/README.md @@ -8,7 +8,7 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04; Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler beta; +| Software | Intel® oneAPI DPC++/C++ Compiler; | What you will learn | How to parallel process image data using DPC++ for producing a Discrete Cosine Transform | Time to complete | 15 minutes diff --git a/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/sample.json b/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/sample.json index a6ff50dad1..0f1a243409 100644 --- a/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/sample.json +++ b/DirectProgramming/DPC++/SpectralMethods/DiscreteCosineTransform/sample.json @@ -1,7 +1,7 @@ { "name": "Discrete Cosine Transform", "description": "An image processing algorithm as seen in the JPEG compression standard.", - "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU"], + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/CPU and GPU"], "os": ["linux", "windows"], "builder": ["ide", "cmake"], "languages": [{"cpp":{}}], diff --git a/DirectProgramming/DPC++/StructuredGrids/1d_HeatTransfer/README.md b/DirectProgramming/DPC++/StructuredGrids/1d_HeatTransfer/README.md index 6459b25e05..346752f830 100644 --- a/DirectProgramming/DPC++/StructuredGrids/1d_HeatTransfer/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/1d_HeatTransfer/README.md @@ -12,8 +12,8 @@ and search based on relevant terms noted in the comments. |:--- |:--- | OS | Linux Ubuntu 18.04 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler (beta); Intel C++ Compiler (beta) -| What you will learn | How to simulate 1D Heat Transfer using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | How to simulate 1D Heat Transfer using Intel® oneAPI DPC++/C++ Compiler | Time to complete | 10 minutes diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md index 43ca9e2d4e..03b33a9171 100644 --- a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md @@ -17,8 +17,8 @@ and search based on relevant terms noted in the comments. |:--- |:--- | OS | Linux Ubuntu 18.04 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler (beta); Intel C++ Compiler (beta) -| What you will learn | How to offload the computation to GPU using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | How to offload the computation to GPU using Intel® oneAPI DPC++/C++ Compiler | Time to complete | 10 minutes diff --git a/DirectProgramming/DPC++/StructuredGrids/iso3dfd_dpcpp/README.md b/DirectProgramming/DPC++/StructuredGrids/iso3dfd_dpcpp/README.md index 516f9c1ba6..67005704b9 100644 --- a/DirectProgramming/DPC++/StructuredGrids/iso3dfd_dpcpp/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/iso3dfd_dpcpp/README.md @@ -8,11 +8,11 @@ For comprehensive instructions regarding DPC++ Programming, go to https://softwa |:--- |:--- | OS | Linux* Ubuntu* 18.04; Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel® oneAPI DPC++ Compiler beta; -| What you will learn | How to offload the computation to GPU using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler; +| What you will learn | How to offload the computation to GPU using Intel® oneAPI DPC++/C++ Compiler | Time to complete | 15 minutes -Performance number tabulation [if applicable -- **NO for beta**] +Performance number tabulation | iso3dfd sample | Performance data |:--- |:--- diff --git a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md index fc1e4910a4..e5a208706b 100644 --- a/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md +++ b/DirectProgramming/DPC++/StructuredGrids/particle-diffusion/README.md @@ -14,11 +14,11 @@ and search based on relevant terms noted in the comments. |:--- |:--- | OS | Linux Ubuntu 18.04; Windows 10 or Windows Server 2017 | Hardware | Kaby Lake with GEN9 or newer -| Software | Intel Data Parallel C++ Compiler (beta) -| What you will learn | How to offload the computation to GPU using Intel DPC++ compiler +| Software | Intel® oneAPI DPC++/C++ Compiler +| What you will learn | How to offload the computation to GPU using Intel® oneAPI DPC++/C++ Compiler | Time to complete | 15 minutes -Performance number tabulation [if applicable] +Performance number tabulation | motionsim sample | Performance data |:--- |:--- From 87b7a79066b4c6849d0ad5398f0f7ce1d2a6875c Mon Sep 17 00:00:00 2001 From: vmadananth <12753028+vmadananth@users.noreply.github.com> Date: Wed, 19 Aug 2020 16:03:21 -0700 Subject: [PATCH 05/17] Merge for removing dependency libsyl-complex.o (#85) * Adding mandelbrot sample to the repository Signed-off-by: vmadanan * Adding changes to mandelbrot to remove libsycl-complex.so dependency --- .../DPC++/CombinationalLogic/mandelbrot/License.txt | 2 +- .../CombinationalLogic/mandelbrot/mandelbrot.vcxproj | 4 +--- .../CombinationalLogic/mandelbrot/src/CMakeLists.txt | 8 ++++---- .../DPC++/CombinationalLogic/mandelbrot/src/mandel.hpp | 9 +++++++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/License.txt b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/License.txt index 9cde07f558..8f608e972a 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/License.txt +++ b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/License.txt @@ -1,4 +1,4 @@ -Copyright Intel Corporation +Copyright 2019 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/mandelbrot.vcxproj b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/mandelbrot.vcxproj index 19bac293d5..8a4eaa9d40 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/mandelbrot.vcxproj +++ b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/mandelbrot.vcxproj @@ -114,7 +114,6 @@ Console true - $(ONEAPI_ROOT)\compiler\latest\windows\bin\libsycl-complex.o @@ -152,10 +151,9 @@ true true true - $(ONEAPI_ROOT)\compiler\latest\windows\bin\libsycl-complex.o - \ No newline at end of file + diff --git a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/CMakeLists.txt b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/CMakeLists.txt index 9cd8f8f64d..4c3d57303d 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/CMakeLists.txt +++ b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/CMakeLists.txt @@ -2,10 +2,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=c++17") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") add_executable(mandelbrot main.cpp) -target_link_libraries(mandelbrot OpenCL sycl $ENV{ONEAPI_ROOT}/compiler/latest/linux/lib/libsycl-complex.o) -add_custom_target(run ${CMAKE_COMMAND} -E env SYCL_BE=PI_OPENCL ./mandelbrot) +target_link_libraries(mandelbrot OpenCL sycl) +add_custom_target(run ./mandelbrot) add_executable(mandelbrot_usm main.cpp) target_compile_definitions(mandelbrot_usm PRIVATE MANDELBROT_USM) -target_link_libraries(mandelbrot_usm OpenCL sycl $ENV{ONEAPI_ROOT}/compiler/latest/linux/lib/libsycl-complex.o) -add_custom_target(run_usm ${CMAKE_COMMAND} -E env SYCL_BE=PI_OPENCL ./mandelbrot_usm) +target_link_libraries(mandelbrot_usm OpenCL sycl) +add_custom_target(run_usm ./mandelbrot_usm) diff --git a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/mandel.hpp b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/mandel.hpp index 991478032c..7c261a5e56 100644 --- a/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/mandel.hpp +++ b/DirectProgramming/DPC++/CombinationalLogic/mandelbrot/src/mandel.hpp @@ -33,6 +33,10 @@ struct MandelParameters { int max_iterations_; typedef std::complex ComplexF; + static std::complex complex_square( std::complex c) + { + return std::complex( c.real()*c.real() - c.imag()*c.imag(), c.real()*c.imag()*2 ); + } MandelParameters(int row_count, int col_count, int max_iterations) : row_count_(row_count), @@ -41,7 +45,7 @@ struct MandelParameters { int row_count() const { return row_count_; } int col_count() const { return col_count_; } - int max_iterations() const { return max_iterations_; } +int max_iterations() const { return max_iterations_; } // Scale from 0..row_count to -1.5..0.5 float ScaleRow(int i) const { return -1.5f + (i * (2.0f / row_count_)); } @@ -63,7 +67,8 @@ struct MandelParameters { break; } - z = z * z + c; + // z = z * z + c; + z = complex_square(z) + c; count++; } From 29936fe892b1add06990e7ca146f32c4bef47be3 Mon Sep 17 00:00:00 2001 From: ethanhirsch <67659250+ethanhirsch@users.noreply.github.com> Date: Wed, 19 Aug 2020 16:04:13 -0700 Subject: [PATCH 06/17] Correcting builder in sample.json (#93) Signed-off-by: Ethan Hirsch --- .../C++/CombinationalLogic/MandelbrotOMP/sample.json | 2 +- .../C++/CompilerInfrastructure/Intrinsics/sample.json | 2 +- DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json index ece8ab4756..c5a3dd649c 100644 --- a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json +++ b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json @@ -3,7 +3,7 @@ "description": "Calculates the mandelbrot set and outputs a bmp image representation using OpenMP*", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], "os": ["linux", "darwin"], - "builder": ["cmake"], + "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "DD113F58-4D91-41BB-B46E-6CF2C0D9F6F9", diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json index 8bc2fbc314..43217b278f 100644 --- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json +++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json @@ -3,7 +3,7 @@ "description": "Demonstrates the intrinsic functions of the Intel® C++ Compiler", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], "os": ["linux", "darwin"], - "builder": ["cmake"], + "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "ACD0E89E-67CC-4CB4-87AB-B12B84962EAF", diff --git a/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json b/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json index cde821978e..9e89eb23d4 100644 --- a/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json +++ b/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json @@ -3,7 +3,7 @@ "description": "Classic sorting algorithm using OpenMP*", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], "os": ["linux", "darwin"], - "builder": ["cmake"], + "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "5AFED65F-F725-411D-B21C-B59008D1166D", From a749efc7ad8f87e6b89e16bffab9764579f80e13 Mon Sep 17 00:00:00 2001 From: akertesz <67655634+akertesz@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:20:28 -0400 Subject: [PATCH 07/17] Initial commit of DPC++ FPGA code samples (#92) Initial commit of 20 DPC++ FPGA code samples Signed-off-by: Audrey Kertesz --- .../ReferenceDesigns/crr/CMakeLists.txt | 11 + .../ReferenceDesigns/crr/License.txt | 7 + .../DPC++FPGA/ReferenceDesigns/crr/README.md | 224 ++ .../DPC++FPGA/ReferenceDesigns/crr/crr.sln | 25 + .../ReferenceDesigns/crr/crr.vcxproj | 165 ++ .../ReferenceDesigns/crr/crr.vcxproj.user | 14 + .../ReferenceDesigns/crr/sample.json | 51 + .../ReferenceDesigns/crr/src/CMakeLists.txt | 116 + .../ReferenceDesigns/crr/src/CRR_common.hpp | 149 + .../ReferenceDesigns/crr/src/build.ninja | 35 + .../crr/src/data/ordered_inputs.csv | 10 + .../ReferenceDesigns/crr/src/main.cpp | 849 ++++++ .../ReferenceDesigns/gzip/CMakeLists.txt | 11 + .../ReferenceDesigns/gzip/License.txt | 7 + .../DPC++FPGA/ReferenceDesigns/gzip/README.md | 201 ++ .../ReferenceDesigns/gzip/Zlib_License.txt | 25 + .../DPC++FPGA/ReferenceDesigns/gzip/gzip.sln | 25 + .../ReferenceDesigns/gzip/gzip.vcxproj | 174 ++ .../ReferenceDesigns/gzip/gzip.vcxproj.user | 14 + .../ReferenceDesigns/gzip/sample.json | 51 + .../ReferenceDesigns/gzip/src/CMakeLists.txt | 125 + .../ReferenceDesigns/gzip/src/CompareGzip.cpp | 85 + .../ReferenceDesigns/gzip/src/CompareGzip.hpp | 41 + .../ReferenceDesigns/gzip/src/WriteGzip.cpp | 163 ++ .../ReferenceDesigns/gzip/src/WriteGzip.hpp | 45 + .../ReferenceDesigns/gzip/src/build.ninja | 32 + .../ReferenceDesigns/gzip/src/crc32.cpp | 126 + .../ReferenceDesigns/gzip/src/crc32.hpp | 46 + .../ReferenceDesigns/gzip/src/gzip.cpp | 520 ++++ .../ReferenceDesigns/gzip/src/gzipkernel.cpp | 2406 +++++++++++++++++ .../ReferenceDesigns/gzip/src/gzipkernel.hpp | 45 + .../ReferenceDesigns/gzip/src/kernels.hpp | 148 + .../ReferenceDesigns/qrd/CMakeLists.txt | 12 + .../ReferenceDesigns/qrd/License.txt | 7 + .../DPC++FPGA/ReferenceDesigns/qrd/README.md | 239 ++ .../DPC++FPGA/ReferenceDesigns/qrd/qrd.sln | 25 + .../ReferenceDesigns/qrd/qrd.vcxproj | 170 ++ .../ReferenceDesigns/qrd/sample.json | 57 + .../ReferenceDesigns/qrd/src/CMakeLists.txt | 129 + .../ReferenceDesigns/qrd/src/build.ninja | 32 + .../ReferenceDesigns/qrd/src/qrd.cpp | 318 +++ .../ReferenceDesigns/qrd/src/qrd.hpp | 43 + .../ReferenceDesigns/qrd/src/qrd_demo.cpp | 233 ++ .../double_buffering/CMakeLists.txt | 11 + .../double_buffering/License.txt | 7 + .../DesignPatterns/double_buffering/README.md | 223 ++ .../double_buffering/double_buffering.sln | 25 + .../double_buffering/double_buffering.vcxproj | 160 ++ .../double_buffering/downtime.png | Bin 0 -> 11112 bytes .../double_buffering/sample.json | 51 + .../double_buffering/src/CMakeLists.txt | 89 + .../double_buffering/src/build.ninja | 30 + .../double_buffering/src/double_buffering.cpp | 349 +++ .../n_way_buffering/CMakeLists.txt | 12 + .../n_way_buffering/License.txt | 7 + .../DesignPatterns/n_way_buffering/README.md | 297 ++ .../n_way_buffering/downtime.png | Bin 0 -> 11112 bytes .../n_way_buffering/n_way_buffering.sln | 25 + .../n_way_buffering/n_way_buffering.vcxproj | 160 ++ .../n_way_buffering/sample.json | 51 + .../n_way_buffering/src/CMakeLists.txt | 93 + .../n_way_buffering/src/build.ninja | 30 + .../n_way_buffering/src/n_way_buffering.cpp | 437 +++ .../onchip_memory_cache/CMakeLists.txt | 11 + .../onchip_memory_cache/License.txt | 7 + .../onchip_memory_cache/README.md | 189 ++ .../onchip_memory_cache.sln | 25 + .../onchip_memory_cache.vcxproj | 160 ++ .../onchip_memory_cache/sample.json | 51 + .../onchip_memory_cache/src/CMakeLists.txt | 89 + .../onchip_memory_cache/src/build.ninja | 30 + .../src/onchip_memory_cache.cpp | 235 ++ .../DesignPatterns/pipe_array/CMakeLists.txt | 11 + .../DesignPatterns/pipe_array/License.txt | 7 + .../DesignPatterns/pipe_array/README.md | 215 ++ .../DesignPatterns/pipe_array/pipe_array.sln | 25 + .../pipe_array/pipe_array.vcxproj | 165 ++ .../DesignPatterns/pipe_array/sample.json | 51 + .../pipe_array/src/CMakeLists.txt | 91 + .../DesignPatterns/pipe_array/src/build.ninja | 30 + .../pipe_array/src/pipe_array.cpp | 177 ++ .../pipe_array/src/pipe_array.hpp | 33 + .../pipe_array/src/pipe_array_internal.hpp | 26 + .../pipe_array/src/unroller.hpp | 15 + .../CMakeLists.txt | 11 + .../License.txt | 7 + .../remove_loop_carried_dependency/README.md | 176 ++ .../loop_carried_dependency.sln | 25 + .../loop_carried_dependency.vcxproj | 160 ++ .../sample.json | 51 + .../src/CMakeLists.txt | 88 + .../src/build.ninja | 30 + .../src/loop_carried_dependency.cpp | 174 ++ .../triangular_loop/CMakeLists.txt | 11 + .../triangular_loop/License.txt | 7 + .../DesignPatterns/triangular_loop/README.md | 295 ++ .../triangular_loop/sample.json | 51 + .../triangular_loop/src/CMakeLists.txt | 88 + .../triangular_loop/src/build.ninja | 30 + .../triangular_loop/src/triangular_loop.cpp | 255 ++ .../triangular_loop/triangular_loop.sln | 25 + .../triangular_loop/triangular_loop.vcxproj | 160 ++ .../kernel_args_restrict/CMakeLists.txt | 11 + .../Features/kernel_args_restrict/License.txt | 7 + .../Features/kernel_args_restrict/README.md | 182 ++ .../kernel_args_restrict.sln | 25 + .../kernel_args_restrict.vcxproj | 155 ++ .../Features/kernel_args_restrict/sample.json | 51 + .../kernel_args_restrict/src/CMakeLists.txt | 94 + .../kernel_args_restrict/src/build.ninja | 30 + .../src/kernel_args_restrict.cpp | 134 + .../Features/loop_coalesce/CMakeLists.txt | 11 + .../Features/loop_coalesce/License.txt | 7 + .../Features/loop_coalesce/README.md | 167 ++ .../Features/loop_coalesce/loop_coalesce.sln | 25 + .../loop_coalesce/loop_coalesce.vcxproj | 161 ++ .../Features/loop_coalesce/sample.json | 51 + .../Features/loop_coalesce/src/CMakeLists.txt | 88 + .../Features/loop_coalesce/src/build.ninja | 30 + .../loop_coalesce/src/loop_coalesce.cpp | 176 ++ .../Features/loop_ivdep/CMakeLists.txt | 11 + .../Tutorials/Features/loop_ivdep/License.txt | 7 + .../Tutorials/Features/loop_ivdep/README.md | 251 ++ .../Features/loop_ivdep/loop_ivdep.sln | 25 + .../Features/loop_ivdep/loop_ivdep.vcxproj | 160 ++ .../Tutorials/Features/loop_ivdep/sample.json | 51 + .../Features/loop_ivdep/src/CMakeLists.txt | 89 + .../Features/loop_ivdep/src/build.ninja | 30 + .../Features/loop_ivdep/src/loop_ivdep.cpp | 127 + .../Features/max_concurrency/CMakeLists.txt | 11 + .../Features/max_concurrency/License.txt | 7 + .../Features/max_concurrency/README.md | 172 ++ .../max_concurrency/max_concurrency.sln | 25 + .../max_concurrency/max_concurrency.vcxproj | 160 ++ .../Features/max_concurrency/sample.json | 51 + .../max_concurrency/src/CMakeLists.txt | 90 + .../Features/max_concurrency/src/build.ninja | 30 + .../max_concurrency/src/max_concurrency.cpp | 187 ++ .../Features/memory_attributes/CMakeLists.txt | 11 + .../Features/memory_attributes/License.txt | 7 + .../Features/memory_attributes/README.md | 277 ++ .../memory_attributes/memory_attributes.sln | 25 + .../memory_attributes.vcxproj | 160 ++ .../Features/memory_attributes/sample.json | 51 + .../memory_attributes/src/CMakeLists.txt | 96 + .../memory_attributes/src/build.ninja | 41 + .../src/memory_attributes.cpp | 227 ++ .../Tutorials/Features/pipes/CMakeLists.txt | 11 + .../Tutorials/Features/pipes/License.txt | 7 + .../Tutorials/Features/pipes/README.md | 250 ++ .../Tutorials/Features/pipes/pipes.sln | 25 + .../Tutorials/Features/pipes/pipes.vcxproj | 160 ++ .../Tutorials/Features/pipes/sample.json | 51 + .../Features/pipes/src/CMakeLists.txt | 89 + .../Tutorials/Features/pipes/src/build.ninja | 30 + .../Tutorials/Features/pipes/src/pipes.cpp | 135 + .../speculated_iterations/CMakeLists.txt | 11 + .../speculated_iterations/License.txt | 7 + .../Features/speculated_iterations/README.md | 174 ++ .../speculated_iterations/sample.json | 51 + .../speculated_iterations.sln | 25 + .../speculated_iterations.vcxproj | 161 ++ .../speculated_iterations/src/CMakeLists.txt | 97 + .../speculated_iterations/src/build.ninja | 32 + .../src/speculated_iterations.cpp | 150 + .../fast_recompile/CMakeLists.txt | 11 + .../GettingStarted/fast_recompile/License.txt | 7 + .../GettingStarted/fast_recompile/README.md | 203 ++ .../fast_recompile/device_link.png | Bin 0 -> 26416 bytes .../fast_recompile/fast_recompile.sln | 25 + .../fast_recompile/fast_recompile.vcxproj | 166 ++ .../fast_recompile/normal_compile.png | Bin 0 -> 16288 bytes .../GettingStarted/fast_recompile/sample.json | 35 + .../fast_recompile/src/CMakeLists.txt | 119 + .../fast_recompile/src/build.ninja | 32 + .../fast_recompile/src/kernel.cpp | 70 + .../fast_recompile/src/kernel.hpp | 16 + .../fast_recompile/src/main.cpp | 48 + .../fpga_compile/CMakeLists.txt | 13 + .../GettingStarted/fpga_compile/License.txt | 7 + .../GettingStarted/fpga_compile/README.md | 193 ++ .../fpga_compile/fpga_compile.sln | 25 + .../fpga_compile/fpga_compile.vcxproj | 160 ++ .../GettingStarted/fpga_compile/sample.json | 51 + .../fpga_compile/src/CMakeLists.txt | 89 + .../fpga_compile/src/build.ninja | 30 + .../fpga_compile/src/fpga_compile.cpp | 118 + .../Tools/system_profiling/CMakeLists.txt | 11 + .../Tools/system_profiling/License.txt | 7 + .../Tools/system_profiling/README.md | 300 ++ .../system_profiling/full_example_trace.PNG | Bin 0 -> 110050 bytes .../Tools/system_profiling/sample.json | 25 + .../Tools/system_profiling/src/CMakeLists.txt | 52 + .../system_profiling/src/double_buffering.cpp | 353 +++ .../with_and_without_double_buffering.PNG | Bin 0 -> 91631 bytes .../Tools/use_library/CMakeLists.txt | 11 + .../Tutorials/Tools/use_library/License.txt | 7 + .../Tutorials/Tools/use_library/README.md | 126 + .../Tutorials/Tools/use_library/sample.json | 34 + .../Tools/use_library/src/CMakeLists.txt | 133 + .../Tutorials/Tools/use_library/src/lib.hpp | 9 + .../Tools/use_library/src/lib_hls.cpp | 7 + .../Tools/use_library/src/lib_ocl.cl | 6 + .../Tutorials/Tools/use_library/src/lib_rtl.v | 18 + .../Tools/use_library/src/lib_rtl_model.cpp | 6 + .../Tools/use_library/src/lib_rtl_spec.xml | 25 + .../Tools/use_library/src/lib_sycl.cpp | 7 + .../Tools/use_library/src/use_library.cpp | 89 + DirectProgramming/FPGA/.gitkeep | 0 209 files changed, 20449 insertions(+) create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/License.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/README.md create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.sln create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj.user create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/sample.json create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CRR_common.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/data/ordered_inputs.csv create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/main.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/License.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/README.md create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/Zlib_License.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.sln create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj.user create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/sample.json create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/kernels.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/License.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/README.md create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.sln create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/sample.json create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.cpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.hpp create mode 100755 DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/downtime.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/downtime.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.hpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array_internal.hpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/unroller.hpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/loop_carried_dependency.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/kernel_args_restrict.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/loop_coalesce.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/loop_ivdep.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/max_concurrency.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/memory_attributes.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/pipes.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/speculated_iterations.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/device_link.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/fast_recompile.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/fast_recompile.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/normal_compile.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.hpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/main.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.sln create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.vcxproj create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/build.ninja create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/full_example_trace.PNG create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/src/double_buffering.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/with_and_without_double_buffering.PNG create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib.hpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_hls.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_ocl.cl create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl.v create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_model.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_spec.xml create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_sycl.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/use_library.cpp delete mode 100644 DirectProgramming/FPGA/.gitkeep diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/CMakeLists.txt new file mode 100755 index 0000000000..6ae6386d49 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(CRR) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/License.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/README.md b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/README.md new file mode 100755 index 0000000000..ab98bae8d7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/README.md @@ -0,0 +1,224 @@ +# CRR Binomial Tree Model for Option Pricing +An FPGA-optimized reference design computing the Cox-Ross-Rubinstein (CRR) binomial tree model with Greeks for American exercise options. + +The [FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a resource for general target-independent DPC++ programming. Additional reference material specific to option pricing algorithms is provided in the References section of this README. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Review a high performance DPC++ design optimized for FPGA +| Time to complete | 1 hr (not including compile time) + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + + +**Performance** +Please refer to performance disclaimer at the end of this README. + +| Device | Throughput +|:--- |:--- +| Intel® PAC with Intel Arria® 10 GX FPGA | 118 assets/s +| Intel® PAC with Intel Stratix® 10 SX FPGA | 243 assets/s + + +## Purpose +This sample implements the Cox-Ross-Rubinstein (CRR) binomial tree model that is used in the finance field for American exercise options with five Greeks (delta, gamma, theta, vega and rho). The simple idea is to model all possible assets price paths using a binomial tree. + +## Key Implementation Details + +### Design Inputs +This design reads inputs from the `ordered_inputs.csv` file. The inputs are: + +| Input | Description +--- |--- +| `n_steps` | Number of time steps in the binomial tree. The maximum `n_steps` in this design is 8189. +| `cp` | -1 or 1 represents put and call options, respectively. +| `spot` | Spot price of the underlying price. +| `fwd` | Forward price of the underlying price. +| `strike` | Exercise price of the option. +| `vol` | Percent volatility that the design reads as a decimal value. +| `df` | Discount factor to option expiry. +| `t` | Time, in years, to the maturity of the option. + +### Design Outputs +This design writes outputs to the `ordered_outputs.csv` file. The outputs are: + +| Output | Description +--- |--- +| `value` | Option price +| `delta` | Measures the rate of change of the theoretical option value with respect to changes in the underlying asset's price. +| `gamma` | Measures the rate of change in the `delta` with respect to changes in the underlying price. +| `vega` | Measures sensitivity to volatility. +| `theta` | Measures the sensitivity of the value of the derivative to the passage of time. +| `rho` | Measures sensitivity to the interest of rate. + +### Design Correctness +This design tests the correctness of the optimized FPGA code by comparing its output to a golden result computed on the CPU. + +### Design Performance +This design measures the FPGA performance to determine how many assets can be processed per second. + +## License +This code sample is licensed under MIT license. + +## Building the CRR Program + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (FPGA) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 48h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Running the Reference Design + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./crr.fpga_emu [-o=] (Linux) + + crr.fpga_emu.exe [-o=] (Windows) + ``` + 2. Run the sample on the FPGA device: + ``` + ./crr.fpga [-o=] (Linux) + ``` + +### Application Parameters + +| Argument | Description +--- |--- +| `` | Optional argument that provides the input data. The default file is `/data/ordered_inputs.csv` +| `-o=` | Optional argument that specifies the name of the output file. The default name of the output file is `ordered_outputs.csv`. + +### Example of Output +``` +============ Correctness Test ============= +Running analytical correctness checks... +CPU-FPGA Equivalence: PASS + +============ Throughput Test ============= +Avg throughput: 66.2 assets/s +``` + +## Additional Design Information + +### Source Code Explanation + +| File | Description +--- |--- +| `main.cpp` | Contains both host code and SYCL* kernel code. +| `CRR_common.hpp` | Header file for `main.cpp`. Contains the data structures needed for both host code and SYCL* kernel code. + + + +### Backend Compiler Flags Used + +| Flag | Description +--- |--- +`-Xshardware` | Target FPGA hardware (as opposed to FPGA emulator) +`-Xsdaz` | Denormals are zero +`-Xsrounding=faithful` | Rounds results to either the upper or lower nearest single-precision numbers +`-Xsparallel=2` | Uses 2 cores when compiling the bitstream through Quartus +`-Xsseed=2` | Uses seed 2 during Quartus, yields slightly higher fMAX + +### Preprocessor Define Flags + +| Flag | Description +--- |--- +`-DOUTER_UNROLL=1` | Uses the value 1 for the constant OUTER_UNROLL, controls the number of CRRs that can be processed in parallel +`-DINNER_UNROLL=64` | Uses the value 64 for the constant INNER_UNROLL, controls the degree of parallelization within the calculation of 1 CRR +`-DOUTER_UNROLL_POW2=1` | Uses the value 1 for the constant OUTER_UNROLL_POW2, controls the number of memory banks + + +NOTE: The Xsseed, DOUTER_UNROLL, DINNER_UNROLL and DOUTER_UNROLL_POW2 values differ depending on the board being targeted. More information about the unroll factors can be found in `/src/CRR_common.hpp`. + +### Performance disclaimers + +Tests document performance of components on a particular test, in specific systems. Differences in hardware, software, or configuration will affect actual performance. Consult other sources of information to evaluate performance as you consider your purchase. For more complete information about performance and benchmark results, visit [www.intel.com/benchmarks](www.intel.com/benchmarks). + +Performance results are based on testing as of July 20, 2020 and may not reflect all publicly available security updates. See configuration disclosure for details. No product or component can be absolutely secure. + +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Performance varies depending on system configuration. Check with your system manufacturer or retailer or learn more at [intel.com](www.intel.com). + +The performance was measured by Intel on July 20, 2020 + +Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. + +(C) Intel Corporation. + +### References + +[Khronous SYCL Resources](https://www.khronos.org/sycl/resources) + +[Binomial options pricing model](https://en.wikipedia.org/wiki/Binomial_options_pricing_model) + +[Wike page for finance Greeks](https://en.wikipedia.org/wiki/Greeks_(finance)) + +[OpenCL Intercept Layer](https://github.com/intel/opencl-intercept-layer) + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.sln b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.sln new file mode 100755 index 0000000000..a95fce9c30 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crr", "crr.vcxproj", "{8EB512FF-4487-4FEC-9B88-8C0DA734B1B2}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8EB512FF-4487-4FEC-9B88-8C0DA734B1B2}.Debug|x64.ActiveCfg = Debug|x64 + {8EB512FF-4487-4FEC-9B88-8C0DA734B1B2}.Debug|x64.Build.0 = Debug|x64 + {8EB512FF-4487-4FEC-9B88-8C0DA734B1B2}.Release|x64.ActiveCfg = Release|x64 + {8EB512FF-4487-4FEC-9B88-8C0DA734B1B2}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {6887ACDD-3E54-4396-A921-99C630333932} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj new file mode 100755 index 0000000000..62a523e96c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj @@ -0,0 +1,165 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + 15.0 + {8eb512ff-4487-4fec-9b88-8c0da734b1b2} + Win32Proj + crr + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR -DOUTER_UNROLL=1 -DINNER_UNROLL=64 -DOUTER_UNROLL_POW2=1 %(AdditionalOptions) + false + $(IntDir)crr.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR -DOUTER_UNROLL=1 -DINNER_UNROLL=64 -DOUTER_UNROLL_POW2=1 %(AdditionalOptions) + $(IntDir)crr.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj.user b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj.user new file mode 100755 index 0000000000..9115b3f275 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/crr.vcxproj.user @@ -0,0 +1,14 @@ + + + + false + + + ./src/data/ordered_inputs.csv -o=./src/data/ordered_outputs.csv + WindowsLocalDebugger + + + ./src/data/ordered_inputs.csv -o=./src/data/ordered_outputs.csv + WindowsLocalDebugger + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/sample.json b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/sample.json new file mode 100755 index 0000000000..6155ce223d --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "D725E06E-0ECE-44F8-910D-AD1A8C89ED89", + "name": "CRR Binomial Tree Model for Option Pricing", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Reference Designs"], + "description": "FPGA-optimized reference design of the Cox-Ross-Rubinstein (CRR) binomial tree model with Greeks for American exercise options", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "builder": ["ide", "cmake"], + "targetDevice": ["FPGA"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./crr.fpga_emu ./src/data/ordered_inputs.csv -o=./src/data/ordered_outputs.csv" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "crr.fpga_emu.exe ./data/ordered_inputs.csv -o=./data/ordered_outputs.csv" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CMakeLists.txt new file mode 100755 index 0000000000..8c56a699ad --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CMakeLists.txt @@ -0,0 +1,116 @@ +set(SOURCE_FILE main.cpp) +set(TARGET_NAME crr) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) +set(REPORTS_TARGET ${TARGET_NAME}_report) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Design specific constant values +set(OUTER_UNROLL_A10 1) +set(INNER_UNROLL_A10 64) +set(OUTER_UNROLL_POW2_A10 1) +set(OUTER_UNROLL_S10 2) +set(INNER_UNROLL_S10 64) +set(OUTER_UNROLL_POW2_S10 2) +set(SEED_A10 1) +set(SEED_S10 2) + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) +SET(OUTER_UNROLL ${OUTER_UNROLL_A10}) +SET(INNER_UNROLL ${INNER_UNROLL_A10}) +SET(OUTER_UNROLL_POW2 ${OUTER_UNROLL_POW2_A10}) +SET(SEED ${SEED_A10}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + SET(OUTER_UNROLL ${OUTER_UNROLL_S10}) + SET(INNER_UNROLL ${INNER_UNROLL_S10}) + SET(OUTER_UNROLL_POW2 ${OUTER_UNROLL_POW2_S10}) + SET(SEED ${SEED_S10}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS -fintelfpga -c -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}) + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +separate_arguments(USER_HARDWARE_FLAGS) +set(HARDWARE_LINK_FLAGS -fintelfpga -Xshardware -Xsdaz -Xsrounding=faithful -Xsparallel=2 -Xsseed=${SEED} -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS} -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}) +set(FINAL_LINK_FLAGS -fintelfpga -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}) + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +#copy input data +configure_file("data/ordered_inputs.csv" "data/ordered_inputs.csv" COPYONLY) + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpgas +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set(DEVICE_FPGA_OBJ "crr_fpga.o") + + add_custom_command(OUTPUT ${DEVICE_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${DEVICE_FPGA_OBJ} + DEPENDS ${SOURCE_FILE}) + + add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} ${DEVICE_FPGA_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${DEVICE_FPGA_OBJ}) +endif() + +# fpga report +if(WIN32) + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + separate_arguments(WIN_FLAGS WINDOWS_COMMAND) + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${WIN_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CRR_common.hpp CRR_common.hpp COPYONLY) + + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${SOURCE_FILE} CRR_common.hpp) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu data/ordered_inputs.csv -o=data/ordered_output.csv + DEPENDS ${TARGET_NAME}.fpga_emu) + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CRR_common.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CRR_common.hpp new file mode 100755 index 0000000000..6f2537e1e0 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/CRR_common.hpp @@ -0,0 +1,149 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __CRR_COMMON_H__ +#define __CRR_COMMON_H__ + +constexpr int kMaxStringLen = 1024; + +// Increments of kMaxNSteps +constexpr size_t kMaxNSteps = 8189; +constexpr size_t kMaxNSteps1 = 8190; +constexpr size_t kMaxNSteps2 = 8191; +constexpr size_t kMaxNSteps3 = 8192; + +// Increment by a small epsilon in order to compute derivative +// of option price with respect to Vol or Interest. The derivatives +// are then used to compute Vega and Rho. +constexpr double kEpsilon = 0.0001; + +// Whenever calculations are made for Option Price 0, need to increment +// nsteps by 2 to ensure all the required derivative prices are calculated. +constexpr size_t kOpt0 = 2; + + +// Solver configuration settings that are dependent on selected +// board. Most notable settings are: + +// OUTER_UNROLL controls the number of CRRs that can be processed +// in parallel in a SIMD fashion (number of CRRS must be >= OUTER_UNROLL). +// This is ideally a power of two, but does not have to be. Since +// the DRAM bandwidth requirement is low, increasing OUTER_UNROLL +// should result in fairly linear speedup. (max: 32 on PAC A10) + +// INNER_UNROLL controls the degree of parallelization within +// the calculation of a single CRR. This must be a power of two. Increasing +// INNER_UNROLL has a lower area overhead than increasing OUTER_UNROLL; +// however, there are diminishing returns as INNER_UNROLL is increased with +// respect to the number of time steps. (max: 128 on PAC A10) + + +// Data structure for original input data. +typedef struct { + int cp; /* cp = -1 or 1 for Put & Call respectively. */ + double n_steps; /* n_steps = number of time steps in the binomial tree. */ + double strike; /* strike = exercise price of option. */ + double spot; /* spot = spot price of the underlying. */ + double fwd; /* fwd = forward price of the underlying. */ + double vol; /* vol = per cent volatility, input as a decimal. */ + double df; /* df = discount factor to option expiry. */ + double t; /* t = time in years to the maturity of the option. */ + +} InputData; + +// Data structure as the inputs to FPGA. +// Element[i] is used to compute option_price[i]. +typedef struct { + double n_steps; /* n_steps = number of time steps in the binomial tree. */ + double u[3]; /* u = the increase factor of a up movement in the binomial tree, + same for each time step. */ + double u2[3]; /* u2 = the square of increase factor. */ + double c1[3]; /* c1 = the probality of a down movement in the binomial tree, + same for each time step. */ + double c2[3]; /* c2 = the probality of a up movement in the binomial tree. */ + double umin[3]; /* umin = minimum price of the underlying at the maturity. */ + double param_1[3];/* param_1[i] = cp * umin[i] */ + double param_2; /* param_2 = cp * strike */ + +} CRRInParams; + +// Data structure as the output from ProcessKernelResult(). +typedef struct { + double pgreek[4]; /* Stores the 4 derivative prices in the binomial tree + required to compute the Premium and Greeks. */ + double vals[3]; /* Three option prices calculated */ + +} InterRes; + +// Data structure for option price and five Greeks. +typedef struct { + double value; /* value = option price. */ + double delta; + double gamma; + double vega; + double theta; + double rho; +} OutputRes; + +// Data structures required by the kernel +typedef struct { + double u; + double c1; + double c2; + double param_1; + double param_2; + short n_steps; + short pad1; + int pad2; + double pad3; + double pad4; +} CRRMeta; + +typedef struct { + double u2; + double p1powu; + double init_optval; + double pad; +} ArrayEle; + +typedef struct { + ArrayEle array_eles[kMaxNSteps3][3]; /* Second dimension size set to 3 to have a + separate ArrayEle for each option price */ +} CRRArrayEles; + +typedef struct { + ArrayEle array_eles[kMaxNSteps3]; +} CRRPerStepMeta; + +typedef struct { + double pgreek[4]; + double optval0; + double pad[3]; +} CRRResParams; + +#endif diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/build.ninja b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/build.ninja new file mode 100755 index 0000000000..58af917f67 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/build.ninja @@ -0,0 +1,35 @@ +source_file = main.cpp +target_name = crr + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -Xsfpc -Xsparallel=2 -Xsseed=5 +emulator_flags = -fintelfpga -DFPGA_EMULATOR +a10_flags = -DOUTER_UNROLL=1 -DINNER_UNROLL=64 -DOUTER_UNROLL_POW2=1 +s10_flags = -DOUTER_UNROLL=2 -DINNER_UNROLL=64 -DOUTER_UNROLL_POW2=2 + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} ${a10_flags} $in -o $out + +rule build_fpga_emu_s10 + command = dpcpp /GX ${emulator_flags} -Xsboard=intel_s10sx_pac:pac_s10 ${s10_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 ${a10_flags} -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 ${s10_flags} -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/data/ordered_inputs.csv b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/data/ordered_inputs.csv new file mode 100755 index 0000000000..3a28083fa2 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/data/ordered_inputs.csv @@ -0,0 +1,10 @@ +8189,-1,37.5,37.50112053,85,0.4,0.99997012,0.011952191 +8189,1,37.5,37.50112053,85,0.4,0.99997012,0.011952191 +8189,-1,270,270.0080678,65,0.18,0.999940241,0.011952191 +8189,1,270,270.0080678,65,0.18,0.999940241,0.011952191 +8189,-1,292.5,292.5087402,70,0.35,0.999940241,0.011952191 +8189,1,292.5,292.5087402,70,0.35,0.999940241,0.011952191 +8189,-1,122.5,122.5109816,40,0.2,0.999910363,0.011952191 +8189,1,122.5,122.5109816,40,0.2,0.999910363,0.011952191 +8189,-1,22.5,22.50067232,55,0.3,0.999910363,0.011952191 +8189,1,22.5,22.50067232,55,0.3,0.999910363,0.011952191 diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/main.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/main.cpp new file mode 100755 index 0000000000..7c92610e19 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/crr/src/main.cpp @@ -0,0 +1,849 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +//////////////////////////////////////////////////////////////////////////////// +// +// CRRSolver CPU/FPGA Accelerator Demo Program +// +//////////////////////////////////////////////////////////////////////////////// +// +// This design implments simple Cox-Ross-Rubinstein(CRR) binomial tree model +// with Greeks for American exercise options. +// +// +// Optimization summary: +// -- Area-consuming but infrequent calculation is done on CPU. +// -- Parallelize the calculation of a single CRR. +// -- Run multiple independent CRRs in parallel. +// -- Optimized memory configurations to reduce the need for replication +// and to eliminate the need for double-pumping M20Ks. +// +// The following diagram shows the mechanism of optimizations to CRR. +// +// +// +------+ ^ +// +------------>|optval| | +// | | [2] | | +// | +------+ | +// | | +// | | +// +--+---+ | +// +------------>|optval| | +// | | [1] | | +// | +--+---+ | +// | | | +// | | | +// | | | Loop4(L4) +// | | | updates +// +---+--+ +------------>+------+ | multiple +// |optval| |optval| | elements +// | [0] | | [1] | | in optval[] +// +---+--+ +------------>+------+ | simultaneously +// | | | +// | | | +// | | | +// | | | +// | +--+---+ | +// | |optval| | +// +------------>| [0] | | +// +--+---+ | +// | | +// | | +// | +------+ | +// | |optval| | +// +------------>| [0] | | +// +------+ + +// +// +// +// +// step 1 step 2 +// +// +// <------------------------------------------+ +// Loop3(L3) updates each level of the tree +// +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "CRR_common.hpp" +#include "dpc_common.hpp" + +using namespace std; +using namespace sycl; + +class CRRSolver; +double CrrSolver(const int n_items, vector &in_params, + vector &res_params, + vector &in_params2, queue &q) { + dpc_common::TimeInterval timer; + + constexpr int steps = kMaxNSteps2; + + const int n_crr = + (((n_items + (OUTER_UNROLL - 1)) / OUTER_UNROLL) * OUTER_UNROLL) * 3; + + { + buffer i_params(in_params.data(), in_params.size()); + buffer r_params(res_params.data(), res_params.size()); + buffer a_params(in_params2.data(), in_params2.size()); + + event e; + { + e = q.submit([&](handler &h) { + auto accessor_v = + i_params.template get_access(h); + + auto accessor_v2 = + a_params.template get_access(h); + + auto accessor_r = + r_params.template get_access(h); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + // Kernel requires n_crr to be a multiple of OUTER_UNROLL. + // This is taken care of by the host. + const int n_crr_div = n_crr / OUTER_UNROLL; + + // Outerloop counter. Use while-loop for better timing-closure + // characteristics because it tells the compiler the loop body will + // never be skipped. + int oc = 0; + do { + // Metadata of CRR problems + [[intelfpga::register]] double u[OUTER_UNROLL]; + [[intelfpga::register]] double c1[OUTER_UNROLL]; + [[intelfpga::register]] double c2[OUTER_UNROLL]; + [[intelfpga::register]] double param_1[OUTER_UNROLL]; + [[intelfpga::register]] double param_2[OUTER_UNROLL]; + [[intelfpga::register]] short n_steps[OUTER_UNROLL]; + + // Current values in binomial tree. We only need to keep track of + // one level worth of data, not the entire tree. + [[intelfpga::memory, intelfpga::singlepump, + intelfpga::bankwidth(sizeof(double)), + intelfpga::numbanks(INNER_UNROLL * OUTER_UNROLL_POW2), + intelfpga::private_copies( + 8)]] double optval[kMaxNSteps3][OUTER_UNROLL_POW2]; + + // Initial values in binomial tree, which correspond to the last + // level of the binomial tree. + [[intelfpga::memory, intelfpga::singlepump, + intelfpga::bankwidth(sizeof(double)), + intelfpga::numbanks(INNER_UNROLL * OUTER_UNROLL_POW2), + intelfpga::private_copies( + 8)]] double init_optval[kMaxNSteps3][OUTER_UNROLL_POW2]; + + // u2_array precalculates the power function of u2. + [[intelfpga::memory, intelfpga::singlepump, + intelfpga::bankwidth(sizeof(double)), + intelfpga::numbanks(INNER_UNROLL * OUTER_UNROLL_POW2), + intelfpga::private_copies( + 8)]] double u2_array[kMaxNSteps3][OUTER_UNROLL_POW2]; + + // p1powu_array precalculates p1 multipy the power of u. + [[intelfpga::memory, intelfpga::singlepump, + intelfpga::bankwidth(sizeof(double)), + intelfpga::numbanks(INNER_UNROLL * OUTER_UNROLL_POW2), + intelfpga::private_copies( + 8)]] double p1powu_array[kMaxNSteps3][OUTER_UNROLL_POW2]; + + // n0_optval stores the binomial tree value corresponding to node 0 + // of a level. This is the same as what's stored in + // optval/init_optval, but replicating this data allows us to have + // only one read port for optval and init_optval, thereby removing + // the need of double-pumping or replication. n0_optval_2 is a copy + // of n0_optval that stores the node 0 value for a specific layer of + // the tree. pgreek is the array saving values for post-calculating + // Greeks. + [[intelfpga::register]] double n0_optval[OUTER_UNROLL]; + [[intelfpga::register]] double n0_optval_2[OUTER_UNROLL]; + [[intelfpga::register]] double pgreek[4][OUTER_UNROLL]; + + // L1 + L2: + // Populate init_optval -- calculate the last level of the binomial + // tree. + for (short ic = 0; ic < OUTER_UNROLL; ++ic) { + // Transfer data from DRAM to local memory or registers + const int c = oc * OUTER_UNROLL + ic; + const CRRMeta param = accessor_v[c]; + + u[ic] = param.u; + c1[ic] = param.c1; + c2[ic] = param.c2; + param_1[ic] = param.param_1; + param_2[ic] = param.param_2; + n_steps[ic] = param.n_steps; + + for (short t = steps; t >= 0; --t) { + const ArrayEle param_array = accessor_v2[c].array_eles[t]; + + const double init_val = param_array.init_optval; + + init_optval[t][ic] = init_val; + + // n0_optval intends to store the node value at t == 0. + // Instead of qualifying this statement by an "if (t == 0)", + // which couples the loop counter to the timing path of the + // assignment, we reverse the loop direction so the last value + // stored corresponds to t == 0. + n0_optval[ic] = init_val; + + // Transfer data from DRAM to local memory or registers + u2_array[t][ic] = param_array.u2; + p1powu_array[t][ic] = param_array.p1powu; + } + } + + // L3: + // Update optval[] -- calculate each level of the binomial tree. + // reg[] helps to achieve updating INNER_UNROLL elements in optval[] + // simultaneously. + [[intelfpga::disable_loop_pipelining]] for (short t = 0; + t <= steps - 1; ++t) { + [[intelfpga::register]] double reg[INNER_UNROLL + 1][OUTER_UNROLL]; + + double val_1, val_2; + + #pragma unroll + for (short ic = 0; ic < OUTER_UNROLL; ++ic) { + reg[0][ic] = n0_optval[ic]; + } + + // L4: + // Calculate all the elements in optval[] -- all the tree nodes + // for one level of the tree + [[intelfpga::ivdep]] for (int n = 0; n <= steps - 1 - t; + n += INNER_UNROLL) { + + #pragma unroll + for (short ic = 0; ic < OUTER_UNROLL; ++ic) { + + #pragma unroll + for (short ri = 1; ri <= INNER_UNROLL; ++ri) { + reg[ri][ic] = + (t == 0) ? init_optval[n + ri][ic] : optval[n + ri][ic]; + } + + #pragma unroll + for (short ri = 0; ri < INNER_UNROLL; ++ri) { + const double val = sycl::fmax( + c1[ic] * reg[ri][ic] + c2[ic] * reg[ri + 1][ic], + p1powu_array[t][ic] * u2_array[n + ri][ic] - + param_2[ic]); + + optval[n + ri][ic] = val; + if (n + ri == 0) { + n0_optval[ic] = val; + } + if (n + ri == 1) { + val_1 = val; + } + if (n + ri == 2) { + val_2 = val; + } + } + + reg[0][ic] = reg[INNER_UNROLL][ic]; + + if (t == steps - 5) { + pgreek[3][ic] = val_2; + } + if (t == steps - 3) { + pgreek[0][ic] = n0_optval[ic]; + pgreek[1][ic] = val_1; + pgreek[2][ic] = val_2; + n0_optval_2[ic] = n0_optval[ic]; + } + } + } + } + + // L5: transfer crr_res_paramss to DRAM + #pragma unroll + for (short ic = 0; ic < OUTER_UNROLL; ++ic) { + const int c = oc * OUTER_UNROLL + ic; + if (n_steps[ic] < steps) { + accessor_r[c].optval0 = n0_optval_2[ic]; + } else { + accessor_r[c].optval0 = n0_optval[ic]; + } + accessor_r[c].pgreek[0] = pgreek[0][ic]; + accessor_r[c].pgreek[1] = pgreek[1][ic]; + accessor_r[c].pgreek[2] = pgreek[2][ic]; + accessor_r[c].pgreek[3] = pgreek[3][ic]; + } + // Increment counters + oc += 1; + } while (oc < n_crr_div); + }); + }); + } + } + + double diff = timer.Elapsed(); + return diff; +} + +void ReadInputFromFile(ifstream &input_file, vector &inp) { + string line_of_args; + while (getline(input_file, line_of_args)) { + InputData temp; + istringstream line_of_args_ss(line_of_args); + line_of_args_ss >> temp.n_steps; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.cp; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.spot; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.fwd; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.strike; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.vol; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.df; + line_of_args_ss.ignore(1, ','); + line_of_args_ss >> temp.t; + + inp.push_back(temp); + } +} + +static string ToStringWithPrecision(const double value, const int p = 6) { + ostringstream out; + out.precision(p); + out << std::fixed << value; + return out.str(); +} + +void WriteOutputToFile(ofstream &output_file, const vector &outp) { + size_t n = outp.size(); + for (size_t i = 0; i < n; ++i) { + OutputRes temp; + temp = outp[i]; + string line = ToStringWithPrecision(temp.value, 12) + " " + + ToStringWithPrecision(temp.delta, 12) + " " + + ToStringWithPrecision(temp.gamma, 12) + " " + + ToStringWithPrecision(temp.vega, 12) + " " + + ToStringWithPrecision(temp.theta, 12) + " " + + ToStringWithPrecision(temp.rho, 12) + "\n"; + + output_file << line; + } +} + +bool FindGetArgString(const string &arg, const char *str, char *str_value, + size_t maxchars) { + size_t found = arg.find(str, 0, strlen(str)); + if (found != string::npos) { + const char *sptr = &arg.c_str()[strlen(str)]; + for (int i = 0; i < maxchars - 1; i++) { + char ch = sptr[i]; + switch (ch) { + case ' ': + case '\t': + case '\0': + str_value[i] = 0; + return true; + break; + default: + str_value[i] = ch; + break; + } + } + return true; + } + return false; +} + +// Perform data pre-processing work +// Three different option prices are required to solve each CRR problem +// The following lists why each option price is required: +// [0] : Used to compute Premium, Delta, Gamma and Theta +// [1] : Used to compute Rho +// [2] : Used to compute Vega +CRRInParams PrepareData(const InputData &inp) { + CRRInParams in_params; + in_params.n_steps = inp.n_steps; + + double r[2]; + r[0] = pow(inp.df, 1.0 / inp.n_steps); + double d_df = exp(-inp.t * kEpsilon); + r[1] = pow(inp.df * d_df, 1.0 / inp.n_steps); + in_params.u[0] = exp(inp.vol * sqrt(inp.t / inp.n_steps)); + in_params.u[1] = in_params.u[0]; + in_params.u[2] = exp((inp.vol + kEpsilon) * sqrt(inp.t / inp.n_steps)); + + in_params.u2[0] = in_params.u[0] * in_params.u[0]; + in_params.u2[1] = in_params.u[1] * in_params.u[1]; + in_params.u2[2] = in_params.u[2] * in_params.u[2]; + in_params.umin[0] = inp.spot * pow(1 / in_params.u[0], inp.n_steps + kOpt0); + in_params.umin[1] = inp.spot * pow(1 / in_params.u[1], inp.n_steps); + in_params.umin[2] = inp.spot * pow(1 / in_params.u[2], inp.n_steps); + in_params.c1[0] = + r[0] * (in_params.u[0] - pow(inp.fwd / inp.spot, 1.0 / inp.n_steps)) / + (in_params.u[0] - 1 / in_params.u[0]); + in_params.c1[1] = + r[1] *(in_params.u[1] - pow((inp.fwd / d_df) / inp.spot, 1.0 / inp.n_steps)) / + (in_params.u[1] - 1 / in_params.u[1]); + in_params.c1[2] = + r[0] * (in_params.u[2] - pow(inp.fwd / inp.spot, 1.0 / inp.n_steps)) / + (in_params.u[2] - 1 / in_params.u[2]); + in_params.c2[0] = r[0] - in_params.c1[0]; + in_params.c2[1] = r[1] - in_params.c1[1]; + in_params.c2[2] = r[0] - in_params.c1[2]; + + in_params.param_1[0] = inp.cp * in_params.umin[0]; + in_params.param_1[1] = inp.cp * in_params.umin[1]; + in_params.param_1[2] = inp.cp * in_params.umin[2]; + in_params.param_2 = inp.cp * inp.strike; + + return in_params; +} + +CRRArrayEles PrepareArrData(const CRRInParams &in) { + CRRArrayEles arr; + + // Write in reverse t-direction to match kernel access pattern + for (int i = 0; i <= in.n_steps + kOpt0; ++i) { + for (int inner_func_index = 0; inner_func_index < 3; ++inner_func_index) { + arr.array_eles[i][inner_func_index].u2 = pow(in.u2[inner_func_index], i); + arr.array_eles[i][inner_func_index].p1powu = + in.param_1[inner_func_index] * pow(in.u[inner_func_index], i + 1); + arr.array_eles[i][inner_func_index].init_optval = + fmax(in.param_1[inner_func_index] * pow(in.u2[inner_func_index], i) - + in.param_2, 0.0); + } + } + + return arr; +} + +// Metadata, used in the Kernel, is generated from the input data +// Each CRR problem is split into 3 subproblems to calculate +// each required option price separately +void PrepareKernelData(vector &in_params, + vector &array_params, + vector &in_buff_params, + vector &in_buff2_params, + const int n_crrs) { + + constexpr short offset = 0; + + for (int wi_idx = offset, dst = offset * 3; wi_idx < n_crrs; ++wi_idx) { + CRRInParams &src_crr_params = in_params[wi_idx]; + + CRRArrayEles &src_crr_eles = array_params[wi_idx]; + + for (int inner_func_index = 0; inner_func_index < 3; + ++inner_func_index, ++dst) { + CRRMeta &dst_crr_meta = in_buff_params[dst]; + CRRPerStepMeta &dst_crr_per_step_meta = in_buff2_params[dst]; + + dst_crr_meta.u = src_crr_params.u[inner_func_index]; + dst_crr_meta.c1 = src_crr_params.c1[inner_func_index]; + dst_crr_meta.c2 = src_crr_params.c2[inner_func_index]; + + dst_crr_meta.param_1 = src_crr_params.param_1[inner_func_index]; + dst_crr_meta.param_2 = src_crr_params.param_2; + + if (inner_func_index == 0) { + dst_crr_meta.n_steps = src_crr_params.n_steps + kOpt0; + } else { + dst_crr_meta.n_steps = src_crr_params.n_steps; + } + for (int i = 0; i <= kMaxNSteps2; ++i) { + dst_crr_per_step_meta.array_eles[i].u2 = + src_crr_eles.array_eles[i][inner_func_index].u2; + dst_crr_per_step_meta.array_eles[i].p1powu = + src_crr_eles.array_eles[i][inner_func_index].p1powu; + dst_crr_per_step_meta.array_eles[i].init_optval = + src_crr_eles.array_eles[i][inner_func_index].init_optval; + } + } + } +} + +// Takes in the result from the kernel and stores the 3 option prices +// belonging to the same CRR problem in one InterRes element +void ProcessKernelResult(const vector &res_params, + vector &postp_buff, const int n_crrs) { + constexpr int offset = 0; + + for (int wi_idx = offset, src = offset * 3; wi_idx < n_crrs; ++wi_idx) { + InterRes &dst_res = postp_buff[wi_idx]; + + for (int inner_func_index = 0; inner_func_index < 3; + ++inner_func_index, ++src) { + const CRRResParams &src_res = res_params[src]; + + for (int i = 0; i < 4; ++i) { + if (inner_func_index == 0) { + dst_res.pgreek[i] = src_res.pgreek[i]; + } + } + + dst_res.vals[inner_func_index] = src_res.optval0; + } + } +} + +// Computes the Premium and Greeks +OutputRes ComputeOutput(const InputData &inp, const CRRInParams &in_params, + const InterRes &res_params) { + double h; + OutputRes res; + h = inp.spot * (in_params.u2[0] - 1 / in_params.u2[0]); + res.value = res_params.pgreek[1]; + res.delta = (res_params.pgreek[2] - res_params.pgreek[0]) / h; + res.gamma = 2 / h * + ((res_params.pgreek[2] - res_params.pgreek[1]) / inp.spot / + (in_params.u2[0] - 1) - + (res_params.pgreek[1] - res_params.pgreek[0]) / inp.spot / + (1 - (1 / in_params.u2[0]))); + res.theta = + (res_params.vals[0] - res_params.pgreek[3]) / 4 / inp.t * inp.n_steps; + res.rho = (res_params.vals[1] - res.value) / kEpsilon; + res.vega = (res_params.vals[2] - res.value) / kEpsilon; + return res; +} + +// Perform CRR solving using the CPU and compare FPGA resutls with CPU results +// to test correctness. +void TestCorrectness(int k, int n_crrs, bool &pass, const InputData &inp, + CRRInParams &vals, const OutputRes &fpga_res) { + if (k == 0) { + std::cout << "\n============= Correctness Test ============= \n"; + std::cout << "Running analytical correctness checks... \n"; + } + + // This CRR benchmark ensures a minimum 4 decimal points match between FPGA and CPU + // "threshold" is chosen to enforce this guarantee + float threshold = 0.00001; + int i, j, q; + double x; + int n_steps = vals.n_steps; + int m = n_steps + kOpt0; + vector pvalue(kMaxNSteps3); + vector pvalue_1(kMaxNSteps1); + vector pvalue_2(kMaxNSteps1); + vector pgreek(5); + InterRes cpu_res_params; + OutputRes cpu_res; + + // option value computed at each final node + x = vals.umin[0]; + for (i = 0; i <= m; i++, x *= vals.u2[0]) { + pvalue[i] = fmax(inp.cp * (x - inp.strike), 0.0); + } + + // backward recursion to evaluate option price + for (i = m - 1; i >= 0; i--) { + vals.umin[0] *= vals.u[0]; + x = vals.umin[0]; + for (j = 0; j <= i; j++, x *= vals.u2[0]) { + pvalue[j] = fmax(vals.c1[0] * pvalue[j] + vals.c2[0] * pvalue[j + 1], + inp.cp * (x - inp.strike)); + } + if (i == 4) { + pgreek[4] = pvalue[2]; + } + if (i == 2) { + for (q = 0; q <= 2; q++) { + pgreek[q + 1] = pvalue[q]; + } + } + } + cpu_res_params.vals[0] = pvalue[0]; + + // the above computation is repeated for each option price + x = vals.umin[1]; + for (i = 0; i <= n_steps; i++, x *= vals.u2[1]) { + pvalue_1[i] = fmax(inp.cp * (x - inp.strike), 0.0); + } + + for (i = n_steps - 1; i >= 0; i--) { + vals.umin[1] *= vals.u[1]; + x = vals.umin[1]; + + for (j = 0; j <= i; j++, x *= vals.u2[1]) { + pvalue_1[j] = + fmax(vals.c1[1] * pvalue_1[j] + vals.c2[1] * pvalue_1[j + 1], + inp.cp * (x - inp.strike)); + } + } + cpu_res_params.vals[1] = pvalue_1[0]; + + x = vals.umin[2]; + for (i = 0; i <= n_steps; i++, x *= vals.u2[2]) { + pvalue_2[i] = fmax(inp.cp * (x - inp.strike), 0.0); + } + + for (i = n_steps - 1; i >= 0; i--) { + vals.umin[2] *= vals.u[2]; + x = vals.umin[2]; + for (j = 0; j <= i; j++, x *= vals.u2[2]) { + pvalue_2[j] = + fmax(vals.c1[2] * pvalue_2[j] + vals.c2[2] * pvalue_2[j + 1], + inp.cp * (x - inp.strike)); + } + } + cpu_res_params.vals[2] = pvalue_2[0]; + pgreek[0] = 0; + + for (i = 1; i < 5; ++i) { + cpu_res_params.pgreek[i - 1] = pgreek[i]; + } + + cpu_res = ComputeOutput(inp, vals, cpu_res_params); + + if (abs(cpu_res.value - fpga_res.value) > threshold) { + pass = false; + std::cout << "fpga_res.value " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.value << "\n"; + std::cout << "cpu_res.value " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.value << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + if (abs(cpu_res.delta - fpga_res.delta) > threshold) { + pass = false; + std::cout << "fpga_res.delta " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.delta << "\n"; + std::cout << "cpu_res.delta " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.delta << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + if (abs(cpu_res.gamma - fpga_res.gamma) > threshold) { + pass = false; + std::cout << "fpga_res.gamma " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.gamma << "\n"; + std::cout << "cpu_res.gamma " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.gamma << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + if (abs(cpu_res.vega - fpga_res.vega) > threshold) { + pass = false; + std::cout << "fpga_res.vega " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.vega << "\n"; + std::cout << "cpu_res.vega " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.vega << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + if (abs(cpu_res.theta - fpga_res.theta) > threshold) { + pass = false; + std::cout << "fpga_res.theta " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.theta << "\n"; + std::cout << "cpu_res.theta " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.theta << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + if (abs(cpu_res.rho - fpga_res.rho) > threshold) { + pass = false; + std::cout << "fpga_res.rho " << k << " = " << std::fixed + << std::setprecision(20) << fpga_res.rho << "\n"; + std::cout << "cpu_res.rho " << k << " = " << std::fixed + << std::setprecision(20) << cpu_res.rho << "\n"; + std::cout << "Mismatch detected for value of crr " << k << "\n"; + } + + if (k == n_crrs - 1) { + std::cout << "CPU-FPGA Equivalence: " << (pass ? "PASS" : "FAIL") << "\n"; + } +} + +// Print out the achieved CRR throughput +void TestThroughput(const double &time, const int &n_crrs) { + std::cout << "\n============= Throughput Test =============\n"; + + std::cout << " Avg throughput: " << std::fixed << std::setprecision(1) + << (n_crrs / time) << " assets/s\n"; +} + +int main(int argc, char *argv[]) { + string infilename = ""; + string outfilename = ""; + + const string default_ifile = "src/data/ordered_inputs.csv"; + const string default_ofile = "src/data/ordered_outputs.csv"; + + char str_buffer[kMaxStringLen] = {0}; + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + string sarg(argv[i]); + + FindGetArgString(sarg, "-o=", str_buffer, kMaxStringLen); + FindGetArgString(sarg, "--output-file=", str_buffer, kMaxStringLen); + } else { + infilename = string(argv[i]); + } + } + + try { +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + queue q(device_selector, dpc_common::exception_handler); + + std::cout << "Running on device: " + << q.get_device().get_info().c_str() << "\n"; + + device device = q.get_device(); + std::cout << "Device name: " + << device.get_info().c_str() << "\n \n \n"; + + vector inp; + + // Get input file name, if users don't have their test input file, this + // design will use the default input file + if (infilename == "") { + infilename = default_ifile; + } + ifstream inputFile(infilename); + + if (!inputFile.is_open()) { + std::cerr << "Input file doesn't exist \n"; + return 1; + } + + // Check input file format + string filename = infilename; + std::size_t found = filename.find_last_of("."); + if (!(filename.substr(found + 1).compare("csv") == 0)) { + std::cerr << "Input file format only support .csv\n"; + return 1; + } + + // Get output file name, if users don't define output file name, the design + // will use the default output file + outfilename = default_ofile; + if (strlen(str_buffer)) { + outfilename = string(str_buffer); + } + + // Check output file format + filename = outfilename; + found = filename.find_last_of("."); + if (!(filename.substr(found + 1).compare("csv") == 0)) { + std::cerr << "Output file format only support .csv\n"; + return 1; + } + + // Read inputs data from input file + ReadInputFromFile(inputFile, inp); + +// Get the number of data from the input file +// Emulator mode only goes through one input (or through OUTER_UNROLL inputs) to +// ensure fast runtime +#if defined(FPGA_EMULATOR) + int temp_crrs = 1; +#else + int temp_crrs = inp.size(); +#endif + + // Check if n_crrs >= OUTER_UNROLL + if (OUTER_UNROLL >= temp_crrs) { + if (inp.size() < OUTER_UNROLL) { + std::cerr << "Input size must be greater than or equal to OUTER_UNROLL\n"; + return 1; + } else { + temp_crrs = OUTER_UNROLL; + } + } + + const int n_crrs = temp_crrs; + + vector in_params(n_crrs); + vector array_params(n_crrs); + + for (int j = 0; j < n_crrs; ++j) { + in_params[j] = PrepareData(inp[j]); + array_params[j] = PrepareArrData(in_params[j]); + } + + // following vectors are arguments for CrrSolver + vector in_buff_params(n_crrs * 3); + vector in_buff2_params(n_crrs * 3); + + vector res_params(n_crrs * 3); + vector res_params_dummy(n_crrs * 3); + + // Prepare metadata as input to kernel + PrepareKernelData(in_params, array_params, in_buff_params, in_buff2_params, + n_crrs); + + // warmup run - use this run to warmup accelerator + CrrSolver(n_crrs, in_buff_params, res_params_dummy, in_buff2_params, + q); + // Timed run - profile performance + double time = CrrSolver(n_crrs, in_buff_params, res_params, + in_buff2_params, q); + bool pass = true; + + // Postprocessing step + // process_res used to compute final results + vector process_res(n_crrs); + ProcessKernelResult(res_params, process_res, n_crrs); + + vector result(n_crrs); + for (int i = 0; i < n_crrs; ++i) { + result[i] = ComputeOutput(inp[i], in_params[i], process_res[i]); + TestCorrectness(i, n_crrs, pass, inp[i], in_params[i], result[i]); + } + + // Write outputs data to output file + ofstream outputFile(outfilename); + + WriteOutputToFile(outputFile, result); + + TestThroughput(time, n_crrs); + + } catch (sycl::exception const &e) { + std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n"; + std::cout << " If you are targeting an FPGA hardware, " + "ensure that your system is plugged to an FPGA board that is " + "set up correctly\n"; + std::cout << " If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR\n"; + return 1; + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/CMakeLists.txt new file mode 100755 index 0000000000..9ac77b0aff --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(GZip) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/License.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/README.md b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/README.md new file mode 100755 index 0000000000..18117a82a5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/README.md @@ -0,0 +1,201 @@ +# GZIP Compression +Reference design demonstrating high-performance GZIP compression on FPGA. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. Additional reference material specific to this GZIP implementation is provided in the References section of this README. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How to implement a high performance multi-engine compression algorithm on FPGA +| Time to complete | 1 hr (not including compile time) + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +**Performance** +Please refer to performance disclaimer at the end of this README. + +| Device | Throughput +|:--- |:--- +| Intel® PAC with Intel Arria® 10 GX FPGA | 1 engine @ 3.4 GB/s +| Intel® PAC with Intel Stratix® 10 SX FPGA | 2 engines @ 5.5 GB/s each = 11.0 GB/s total + + +## Purpose + +This DPC++ reference design implements a compression algorithm. The implementation is optimized for the FPGA device. The compression result is GZIP-compatible and can be decompressed with GUNZIP. The GZIP output file format is compatible with GZIP's DEFLATE algorithm, and follows a fixed subset of [RFC 1951](https://www.ietf.org/rfc/rfc1951.txt). See the References section for more specific references. + +The algorithm uses a GZIP-compatible Limpel-Ziv 77 (LZ77) algorithm for data de-duplication, and a GZIP-compatible Static Huffman algorithm for bit reduction. The implementation includes three FPGA accelerated tasks (LZ77, Static Huffman and CRC). + +The FPGA implementation of the algorithm enables either one or two independent GZIP compute engines to operate in parallel on the FPGA. The number of engines is constrained by the available FPGA resources. By default, the design is parameterized to create a single engine when the design is compiled targeting Intel® PAC with Intel Arria® 10 GX FPGA. Two engines are created when targeting Intel® PAC with Intel Stratix® 10 SX FPGA, a larger device. + +## Key Implementation Details + + | Kernel | Description +--- |--- +| LZ Reduction | Implements a LZ77 algorithm for data de-duplication. The algorithm produces distance and length information that is compatible with GZIP's DEFLATE implementation. +| Static Huffman | Uses the same Static Huffman codes used by GZIP's DEFLATE algorithm when it chooses a Static Huffman coding scheme for bit reduction. This choice maintains compatibility with GUNZIP. +| CRC | Adds a CRC checksum based on the input file; this is required by the gzip file format + +To optimize performance, GZIP leverages techniques discussed in the following FPGA tutorials: +* **Double Buffering to Overlap Kernel Execution with Buffer Transfers and Host Processing** (double_buffering) +* **On-Chip Memory Attributes** (mem_config) + + +## License +This code sample is licensed under MIT license. + + +## Building the `gzip` Reference Design + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 24h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Running the Reference Design + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./gzip.fpga_emu [-o=] (Linux) + gzip.fpga_emu.exe [-o=] (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./gzip.fpga [-o=] (Linux) + ``` + ### Application Parameters + +| Argument | Description +--- |--- +| `` | Mandatory argument that specifies the file to be compressed. Use a 120+ MB file to achieve peak performance. +| `-o=` | Optional argument that specifies the name of the output file. The default name of the output file is `.gz`. When targeting Intel Stratix® 10 SX, the single `` is fed to both engines, yielding two identical output files, using `` as the basis for the filenames. + +### Example of Output + +``` +Running on device: pac_a10 : Intel PAC Platform (pac_ee00000) +Throughput: 3.4321 GB/s +Compression Ratio 33.2737% +PASSED +``` +## Additional Design Information +### Source Code Explanation + +| File | Description +--- |--- +| `gzip.cpp` | Contains the `main()` function and the top-level interfaces to the SYCL* GZIP functions. +| `gzipkernel.cpp` | Contains the SYCL* kernels used to implement GZIP. +| `CompareGzip.cpp` | Contains code to compare a GZIP-compatible file with the original input. +| `WriteGzip.cpp` | Contains code to write a GZIP compatible file. +| `crc32.cpp` | Contains code to calculate a 32-bit CRC that is compatible with the GZIP file format and to combine multiple 32-bit CRC values. It is used to account only for the CRC of the last few bytes in the file, which are not processed by the accelerated CRC kernel. +| `kernels.hpp` | Contains miscellaneous defines and structure definitions required by the LZReduction and Static Huffman kernels. +| `crc32.hpp` | Header file for `crc32.cpp`. +| `gzipkernel.hpp` | Header file for `gzipkernels.cpp`. +| `CompareGzip.hpp` | Header file for `CompareGzip.cpp`. +| `WriteGzip.hpp` | Header file for `WriteGzip.cpp`. + +### Compiler Flags Used + +| Flag | Description +--- |--- +`-Xshardware` | Target FPGA hardware (as opposed to FPGA emulator) +`-Xsparallel=2` | Uses 2 cores when compiling the bitstream through Quartus +`-Xsseed=1` | Uses seed 1 during Quartus, yields slightly higher fmax +`-Xsnum-reorder=6` | On Intel Stratix® 10 SX only, specify a wider data path for read data from global memory +`-DNUM_ENGINES=<1|2>` | Specifies that 1 GZIP engine should be compiled when targeting Arria® 10 GX and 2 engines when targeting Intel Stratix® 10 SX + + +### Performance disclaimers + +Tests document performance of components on a particular test, in specific systems. Differences in hardware, software, or configuration will affect actual performance. Consult other sources of information to evaluate performance as you consider your purchase. For more complete information about performance and benchmark results, visit [www.intel.com/benchmarks](www.intel.com/benchmarks). + +Performance results are based on testing as of July 29, 2020 and may not reflect all publicly available security updates. See configuration disclosure for details. No product or component can be absolutely secure. + +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Performance varies depending on system configuration. Check with your system manufacturer or retailer or learn more at [intel.com](www.intel.com). + +The performance was measured by Intel on July 29, 2020 + +Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. + +(C) Intel Corporation. + +### References +[Khronous SYCL Resources](https://www.khronos.org/sycl/resources) + +[Intel GZIP OpenCL Design Example](https://www.intel.com/content/www/us/en/programmable/support/support-resources/design-examples/design-software/opencl/gzip-compression.html) + +[RFC 1951 - DEFLATE Data Format](https://www.ietf.org/rfc/rfc1951.txt) + +[RFC 1952 - GZIP Specification 4.3](https://www.ietf.org/rfc/rfc1952.txt) + +[OpenCL Intercept Layer](https://github.com/intel/opencl-intercept-layer) + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/Zlib_License.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/Zlib_License.txt new file mode 100755 index 0000000000..a75dd96a90 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/Zlib_License.txt @@ -0,0 +1,25 @@ +zlib License + + zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.sln b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.sln new file mode 100755 index 0000000000..580f35f08b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gzip", "gzip.vcxproj", "{CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.ActiveCfg = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.Build.0 = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.ActiveCfg = Release|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {92BEFAAB-0365-4E5A-9C4A-E50AB49B2A6B} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj new file mode 100755 index 0000000000..cf6a2462d2 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj @@ -0,0 +1,174 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + 15.0 + {cf6a576b-665d-4f24-bb62-0dae7a7b3c64} + Win32Proj + gzip + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj.user b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj.user new file mode 100755 index 0000000000..1956841792 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/gzip.vcxproj.user @@ -0,0 +1,14 @@ + + + + false + + + src/gzip.cpp -o=test.gz + WindowsLocalDebugger + + + src/gzip.cpp -o=test.gz + WindowsLocalDebugger + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/sample.json b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/sample.json new file mode 100755 index 0000000000..a6d65ecd17 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "D55081EB-669D-4832-BCE6-23EE2ACA9F0F", + "name": "GZIP Compression", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Reference Designs"], + "description": "Reference design demonstrating high-performance GZIP compression on FPGA", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "builder": ["ide", "cmake"], + "targetDevice": ["FPGA"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./gzip.fpga_emu ../src/gzip.cpp -o=test.gz" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "gzip.fpga_emu.exe ../src/gzip.cpp -o=test.gz" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt new file mode 100755 index 0000000000..bf6125045f --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt @@ -0,0 +1,125 @@ +set(DEVICE_SOURCE_FILE gzipkernel.cpp) +set(DEVICE_HEADER_FILE gzipkernel.hpp) +set(HOST_SOURCE_FILE gzip.cpp crc32.cpp WriteGzip.cpp CompareGzip.cpp) + +set(TARGET_NAME gzip) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) +set(REPORTS_TARGET ${TARGET_NAME}_report) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Design specific constant values + +# To increase NUM_ENGINES to greater than 2, must also statically declare more engines in gzipkernel.cpp --> SubmitGzipTasks() +set(NUM_ENGINES_A10 1) +set(NUM_ENGINES_S10 2) +set(NUM_REORDER "") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) +SET(NUM_ENGINES ${NUM_ENGINES_A10}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + SET(NUM_ENGINES ${NUM_ENGINES_S10}) + set(NUM_REORDER "-Xsnum-reorder=6") + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +#specify -MMD -fsycl-link-targets=... instead of -fintelfpga to workaround known issue; lower report quality +set(HARDWARE_COMPILE_FLAGS -MMD -fsycl-link-targets=spir64_fpga-unknown-unknown-sycldevice -c -DNUM_ENGINES=${NUM_ENGINES}) + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +separate_arguments(USER_HARDWARE_FLAGS) +set(HARDWARE_LINK_FLAGS -fintelfpga -Xshardware -Xsparallel=2 -Xsseed=1 ${NUM_REORDER} -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS} -DNUM_ENGINES=${NUM_ENGINES}) +set(FINAL_LINK_FLAGS -fintelfpga -DNUM_ENGINES=${NUM_ENGINES}) + +set(EMULATOR_COMPILE_FLAGS "-v -v -v -g0 -fintelfpga -DFPGA_EMULATOR -DNUM_ENGINES=${NUM_ENGINES}") +set(EMULATOR_LINK_FLAGS -fintelfpga) + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} ${CMAKE_CURRENT_SOURCE_DIR}/gzip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crc32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/WriteGzip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CompareGzip.cpp -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${HOST_SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${DEVICE_SOURCE_FILE} ${HOST_SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set(DEVICE_FPGA_OBJ "gzipkernel_fpga.o") + set(DEVICE_IMAGE_FPGA_OBJ "gzipkernel_fpga.a") + set(HOST_SOURCE_FILES_WITH_PATH ${CMAKE_CURRENT_SOURCE_DIR}/gzip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crc32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/WriteGzip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CompareGzip.cpp) + + add_custom_command(OUTPUT ${DEVICE_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_FPGA_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE}) + + set(OBJ_FILES) + foreach(HOST_FILE ${HOST_SOURCE_FILES_WITH_PATH}) + set(HOST_FPGA_OBJ ${HOST_FILE}.o) + add_custom_command(OUTPUT ${HOST_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${HOST_FILE} -o ${HOST_FPGA_OBJ} + DEPENDS ${HOST_FILE}) + list(APPEND OBJ_FILES ${HOST_FPGA_OBJ}) + endforeach() + + add_custom_command(OUTPUT ${DEVICE_IMAGE_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link=image ${DEVICE_FPGA_OBJ} -o ${DEVICE_IMAGE_FPGA_OBJ} + DEPENDS ${DEVICE_FPGA_OBJ} ${OBJ_FILES}) + + add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${FINAL_LINK_FLAGS} ${OBJ_FILES} ${DEVICE_IMAGE_FPGA_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${DEVICE_IMAGE_FPGA_OBJ} ${OBJ_FILES}) +endif() + +# fpga report +if(WIN32) + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + separate_arguments(WIN_FLAGS WINDOWS_COMMAND) + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${WIN_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE}) + +else() + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} ${DEVICE_SOURCE_FILE} COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/kernels.hpp kernels.hpp COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_HEADER_FILE} ${DEVICE_HEADER_FILE} COPYONLY) + + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${DEVICE_SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE} kernels.hpp) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu Makefile -o=test.gz + DEPENDS ${TARGET_NAME}.fpga_emu) + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.cpp new file mode 100755 index 0000000000..b803dee96b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.cpp @@ -0,0 +1,85 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#include "CompareGzip.hpp" + +// returns 0 on success, otherwise failure +int CompareGzipFiles( + const std::string + &original_file, // original input file to compare gzip uncompressed + const std::string &input_gzfile) // gzip file to check +{ +#ifdef _MSC_VER + std::cout + << "Info: skipping output verification on Windows, no builtin gunzip\n"; + return 0; +#else + //------------------------------------------------------------------ + // assume all good to start with. + + int gzipstatus = 0; + + //------------------------------------------------------------------ + // Create temporary output filename for gunzip + + char tmp_name[] = "/tmp/gzip_fpga.XXXXXX"; + mkstemp(tmp_name); + std::string outputfile = tmp_name; + + //------------------------------------------------------------------ + // Check that the original file and gzipped file exist. + + //------------------------------------------------------------------ + // gunzip the file produced to stdout, capturing to the temp file. + + std::string cmd = "gunzip -c "; + cmd += input_gzfile; + cmd += " > " + outputfile; + + int gzout = ::system(cmd.c_str()); + if (gzout != 0) { + gzipstatus = 3; + } + + //------------------------------------------------------------------ + // diff the temp file and the original. + + cmd = "diff -q " + outputfile + " " + original_file; + int diffout = ::system(cmd.c_str()); + if (diffout != 0) { + gzipstatus = 4; + } + + //------------------------------------------------------------------ + // Cleanup, remove the temp file. + + (void)::remove(outputfile.c_str()); + + return gzipstatus; +#endif +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.hpp new file mode 100755 index 0000000000..5624b97cea --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/CompareGzip.hpp @@ -0,0 +1,41 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __COMPAREGZIP_H__ +#define __COMPAREGZIP_H__ +#pragma once + +#include +#include + +int CompareGzipFiles( + const std::string + &original_file, // original input file to compare gzip uncompressed + const std::string &input_gzfile); // gzip file to check + +#endif //__COMPAREGZIP_H__ diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.cpp new file mode 100755 index 0000000000..71c370aa96 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.cpp @@ -0,0 +1,163 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#define _CRT_SECURE_NO_WARNINGS +#include "WriteGzip.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +constexpr int kDeflated = 8; +#define GZIP_MAGIC "\037\213" // Magic header for gzip files, 1F 8B + +#define ORIG_NAME 0x08 +#define OS_CODE 0x03 // Unix OS_CODE + +typedef struct GzipHeader { + unsigned char magic[2]; // 0x1f, 0x8b + unsigned char compress_method; // 0-7 reserved, 8=deflate -- kDeflated + unsigned char flags; // b0: file probably ascii + // b1: header crc-16 present + // b2: extra field present + // b3: original file name present + // b4: file comment present + // b5,6,7: reserved + unsigned long time; // file modification time in Unix format. + // Set this to 0 for now. + + unsigned char extra; // depends on compression method + unsigned char os; // operating system on which compression took place + + // ... + // ? bytes ... compressd data ... + + unsigned long crc; + unsigned long uncompressed_sz; + +} gzip_header, *pgzip_header; + +inline static void PutUlong(uint8_t *pc, unsigned long l) { + pc[0] = l & 0xff; + pc[1] = (l >> 8) & 0xff; + pc[2] = (l >> 16) & 0xff; + pc[3] = (l >> 24) & 0xff; +} + +// returns 0 on success, otherwise failure +int WriteBlockGzip( + std::string &original_filename, // Original file name being compressed + std::string &out_filename, // gzip filename + char *obuf, // pointer to compressed data block + size_t blen, // length of compressed data block + size_t ilen, // original block length + uint32_t buffer_crc) // the block's crc +{ + //------------------------------------------------------------------ + // Setup the gzip output file header. + // max filename size is arbitrarily set to 256 bytes long + // Method is always DEFLATE + // Original filename is always set in header + // timestamp is set to 0 - ignored by gunzip + // deflate flags set to 0 + // OS code is 0 + + int max_filename_sz = 256; + + unsigned char *pgziphdr = + (unsigned char *)malloc(sizeof(gzip_header) + max_filename_sz); + + if (!pgziphdr) { + std::cout << "pgzip header cannot be allocated\n"; + return 1; + } + + pgziphdr[0] = GZIP_MAGIC[0]; + pgziphdr[1] = GZIP_MAGIC[1]; + pgziphdr[2] = kDeflated; + pgziphdr[3] = ORIG_NAME; + + // Set time in header to 0, this is ignored by gunzip. + pgziphdr[4] = 0; + pgziphdr[5] = 0; + pgziphdr[6] = 0; + pgziphdr[7] = 0; + + // Deflate flags + pgziphdr[8] = 0; + + // OS code is Linux in this case. + pgziphdr[9] = OS_CODE; + + int ondx = 10; + + const char *p = original_filename.c_str(); + do { + pgziphdr[ondx++] = (*p); + } while (*p++); + + int header_bytes = ondx; + + unsigned char prolog[8]; + + PutUlong(((unsigned char *)prolog), buffer_crc); + PutUlong(((unsigned char *)&prolog[4]), ilen); + + FILE *fo = fopen(out_filename.c_str(), "w+"); + if (ferror(fo)) { + std::cout << "Cannot open file for output: " << out_filename << "\n"; + free(pgziphdr); + return 1; + } + + fwrite(pgziphdr, 1, header_bytes, fo); + fwrite(obuf, 1, blen, fo); + fwrite(prolog, 1, 8, fo); + + if (ferror(fo)) { + std::cout << "gzip output file write failure.\n"; + free(pgziphdr); + return 1; + } + + if (fclose(fo)) { + perror("close"); + free(pgziphdr); + return 1; + } + free(pgziphdr); + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.hpp new file mode 100755 index 0000000000..66bc28e315 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/WriteGzip.hpp @@ -0,0 +1,45 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __WRITEGZIP_H__ +#define __WRITEGZIP_H__ +#pragma once + +#include +#include + +// returns 0 on success, otherwise failure +int WriteBlockGzip( + std::string &original_filename, // Original file name being compressed + std::string &out_filename, // gzip filename + char *obuf, // pointer to compressed data block + size_t blen, // length of compressed data block + size_t ilen, // original block length + uint32_t buffer_crc); // the block's crc + +#endif //__WRITEGZIP_H__ diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/build.ninja b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/build.ninja new file mode 100755 index 0000000000..29d50e63a0 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/build.ninja @@ -0,0 +1,32 @@ +device_source_file = gzipkernel.cpp +device_header_file = gzipkernel.h +host_source_file = gzip.cpp crc32.cpp WriteGzip.cpp CompareGzip.cpp +target_name = gzip + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -Xsclock=280MHz -Xsparallel=2 -Xsseed=1 +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} ${device_source_file} ${host_source_file} -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 ${device_source_file} ${host_source_file} -fsycl-link -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 ${device_source_file} ${host_source_file} -fsycl-link -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu + +# report +build report: phony ${report_target} +build ${report_target}: gen_report + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.cpp new file mode 100755 index 0000000000..8e6c59c734 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.cpp @@ -0,0 +1,126 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +/* + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "crc32.hpp" + +// This table is CRC32s for all single byte values created by using the +// makecrc.c utility from gzip for compatibility with gzip. makecrc.c can be +// found in the gzip source code project found at +// https://git.savannah.gnu.org/git/gzip.git. The polynomial 0xedb88320 is used +// for gzip, and thus used to create this table. +// +// Not copyrighted 1990, Mark Adler. +// +const unsigned int crc32_table[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL}; + +// +// This routine creates a Crc32 from a memory buffer (address, and length), and +// a previous crc. This routine can be called iteratively on different portions +// of the same buffer, using a previously returned crc value. The +// value 0xffffffff is used for the first buffer invocation. +unsigned int Crc32Host( + const char *pbuf, // pointer to the buffer to crc + size_t sz, // number of bytes + unsigned int previous_crc) // previous CRC, allows combining. +{ + unsigned int curr_crc = ~previous_crc; + if (sz) do { + curr_crc = + crc32_table[((int)curr_crc ^ (*pbuf++)) & 0xff] ^ (curr_crc >> 8); + } while (--sz); + return curr_crc ^ 0xffffffffL; +} + +unsigned int Crc32(const char *in, size_t buffer_sz, + unsigned int previous_crc) { + const int num_nibbles_parallel = 64; + const int num_sections = + buffer_sz / (num_nibbles_parallel / 2); // how many loop iterations + // now deal with the remainder, this should be done on the software host + // the post-invert also happens inside crc_reference + const char *remaining_data = &in[num_sections * (num_nibbles_parallel / 2)]; + int remaining_bytes = buffer_sz % (num_nibbles_parallel / 2); + return Crc32Host(remaining_data, remaining_bytes, previous_crc); +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.hpp new file mode 100755 index 0000000000..138a8f0754 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/crc32.hpp @@ -0,0 +1,46 @@ +// ============================================================== +// Copyright Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __CRC32_H__ +#define __CRC32_H__ +#pragma once + +#include +#include + +uint32_t Crc32Host( + const char *pbuf, // pointer to the buffer to crc + size_t sz, // number of bytes + uint32_t previous_crc); // previous CRC, allows combining. First invocation + // would use 0xffffffff. +uint32_t Crc32(const char *pbuf, // pointer to the buffer to crc + size_t sz, // number of bytes + uint32_t previous_crc); // previous CRC, allows combining. First + // invocation would use 0xffffffff. + +#endif //__CRC32_H__ diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip.cpp new file mode 100755 index 0000000000..9ecfe11728 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzip.cpp @@ -0,0 +1,520 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#include +#include +#include +#include +#include + +#include "CompareGzip.hpp" +#include "WriteGzip.hpp" +#include "crc32.hpp" +#include "dpc_common.hpp" +#include "gzipkernel.hpp" +#include "kernels.hpp" + +using namespace sycl; + +// The minimum file size of a file to be compressed. +// Any filesize less than this results in an error. +constexpr int minimum_filesize = kVec + 1; + +bool help = false; + +int CompressFile(queue &q, std::string &input_file, std::vector outfilenames, + int iterations, bool report); + +void Help(void) { + // Command line arguments. + // gzip [options] filetozip [options] + // -h,--help : help + + // future options? + // -p,performance : output perf metrics + // -m,maxmapping=# : maximum mapping size + + std::cout << "gzip filename [options]\n"; + std::cout << " -h,--help : this help text\n"; + std::cout + << " -o=,--output-file= : specify output file\n"; +} + +bool FindGetArg(std::string &arg, const char *str, int defaultval, int *val) { + std::size_t found = arg.find(str, 0, strlen(str)); + if (found != std::string::npos) { + int value = atoi(&arg.c_str()[strlen(str)]); + *val = value; + return true; + } + return false; +} + +constexpr int kMaxStringLen = 40; + +bool FindGetArgString(std::string &arg, const char *str, char *str_value, + size_t maxchars) { + std::size_t found = arg.find(str, 0, strlen(str)); + if (found != std::string::npos) { + const char *sptr = &arg.c_str()[strlen(str)]; + for (int i = 0; i < maxchars - 1; i++) { + char ch = sptr[i]; + switch (ch) { + case ' ': + case '\t': + case '\0': + str_value[i] = 0; + return true; + break; + default: + str_value[i] = ch; + break; + } + } + return true; + } + return false; +} + +size_t SyclGetExecTimeNs(event e) { + size_t start_time = + e.get_profiling_info(); + size_t end_time = + e.get_profiling_info(); + return (end_time - start_time); +} + +int main(int argc, char *argv[]) { + std::string infilename = ""; + + std::vector outfilenames (kNumEngines); + + char str_buffer[kMaxStringLen] = {0}; + + // Check the number of arguments specified + if (argc != 3) { + std::cerr << "Incorrect number of arguments. Correct usage: " << argv[0] + << " -o=\n"; + return 1; + } + + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + std::string sarg(argv[i]); + if (std::string(argv[i]) == "-h") { + help = true; + } + if (std::string(argv[i]) == "--help") { + help = true; + } + + FindGetArgString(sarg, "-o=", str_buffer, kMaxStringLen); + FindGetArgString(sarg, "--output-file=", str_buffer, kMaxStringLen); + } else { + infilename = std::string(argv[i]); + } + } + + if (help) { + Help(); + return 1; + } + + try { +#ifdef FPGA_EMULATOR + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + auto prop_list = property_list{property::queue::enable_profiling()}; + queue q(device_selector, dpc_common::exception_handler, prop_list); + + std::cout << "Running on device: " + << q.get_device().get_info().c_str() << "\n"; + + if (infilename == "") { + std::cout << "Must specify a filename to compress\n\n"; + Help(); + return 1; + } + + // next, check valid and acceptable parameter ranges. + // if output filename not set, use the default + // name, else use the name specified by the user + outfilenames[0] = std::string(infilename) + ".gz"; + if (strlen(str_buffer)) { + outfilenames[0] = std::string(str_buffer); + } + for (size_t i=1; i< kNumEngines; i++) { + // Filenames will be of the form outfilename, outfilename2, outfilename3 etc. + outfilenames[i] = outfilenames[0] + std::to_string(i+1); + } + + std::cout << "Launching GZIP application with " << kNumEngines + << " engines\n"; + +#ifdef FPGA_EMULATOR + CompressFile(q, infilename, outfilenames, 1, true); +#else + // warmup run - use this run to warmup accelerator. There are some steps in + // the runtime that are only executed on the first kernel invocation but not + // on subsequent invocations. So execute all that stuff here before we + // measure performance (in the next call to CompressFile(). + CompressFile(q, infilename, outfilenames, 1, false); + // profile performance + CompressFile(q, infilename, outfilenames, 200, true); +#endif + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} + +struct KernelInfo { + buffer *gzip_out_buf; + buffer *current_crc; + buffer *pobuf; + buffer *pibuf; + char *pobuf_decompress; + + uint32_t buffer_crc[kMinBufferSize]; + uint32_t refcrc; + + const char *pref_buffer; + char *poutput_buffer; + size_t file_size; + struct GzipOutInfo out_info[kMinBufferSize]; + int iteration; + bool last_block; +}; + +// returns 0 on success, otherwise a non-zero failure code. +int CompressFile(queue &q, std::string &input_file, std::vector outfilenames, + int iterations, bool report) { + size_t isz; + char *pinbuf; + + // Read the input file + std::string device_string = + q.get_device().get_info().c_str(); + bool prepin = + (device_string.find("s10") != + std::string::npos); // Check if "s10" is found in the device string. If + // the device is S10, we pre-pin some buffers to + // improve DMA performance, which is needed to + // achieve peak kernel throughput. Pre-pinning is + // only supported on the PAC-S10 BSP. It's not + // needed on PAC-A10 to achieve peak performance. + + std::ifstream file(input_file, + std::ios::in | std::ios::binary | std::ios::ate); + if (file.is_open()) { + isz = file.tellg(); + if (prepin) { + pinbuf = (char *)malloc_host( + isz, q.get_context()); // Pre-pin the buffer, for faster DMA + } else { // throughput, using malloc_host(). + pinbuf = new char[isz]; + } + file.seekg(0, std::ios::beg); + file.read(pinbuf, isz); + file.close(); + } else { + std::cout << "Error: cannot read specified input file\n"; + return 1; + } + + if (isz < minimum_filesize) { + std::cout << "Minimum filesize for compression is " << minimum_filesize + << "\n"; + return 1; + } + + int buffers_count = iterations; + + // Create an array of kernel info structures and create buffers for kernel + // input/output. The buffers are re-used between iterations, but enough + // disjoint buffers are created to support double-buffering. + struct KernelInfo *kinfo[kNumEngines]; + for (size_t eng = 0; eng < kNumEngines; eng++) { + kinfo[eng] = + (struct KernelInfo *)malloc(sizeof(struct KernelInfo) * buffers_count); + if (kinfo[eng] == NULL) { + std::cout << "Cannot allocate kernel info buffer.\n"; + return 1; + } + for (int i = 0; i < buffers_count; i++) { + kinfo[eng][i].file_size = isz; + // Allocating slightly larger buffers (+ 16 * kVec) to account for + // granularity of kernel writes + int outputSize = kinfo[eng][i].file_size + 16 * kVec < kMinBufferSize + ? kMinBufferSize + : kinfo[eng][i].file_size + 16 * kVec; + + // Pre-pin buffer using malloc_host() to improve DMA bandwidth. + if (i >= 3) { + kinfo[eng][i].poutput_buffer = kinfo[eng][i - 3].poutput_buffer; + } else { + if (prepin) { + kinfo[eng][i].poutput_buffer = + (char *)malloc_host(outputSize, q.get_context()); + } else { + kinfo[eng][i].poutput_buffer = (char *)malloc(outputSize); + } + if (kinfo[eng][i].poutput_buffer == NULL) { + std::cout << "Cannot allocate output buffer.\n"; + free(kinfo); + return 1; + } + // zero pages to fully allocate them + memset(kinfo[eng][i].poutput_buffer, 0, outputSize); + } + + kinfo[eng][i].last_block = true; + kinfo[eng][i].iteration = i; + kinfo[eng][i].pref_buffer = pinbuf; + + kinfo[eng][i].gzip_out_buf = + i >= 3 ? kinfo[eng][i - 3].gzip_out_buf + : new buffer(kMinBufferSize); + kinfo[eng][i].current_crc = i >= 3 + ? kinfo[eng][i - 3].current_crc + : new buffer(kMinBufferSize); + kinfo[eng][i].pibuf = i >= 3 + ? kinfo[eng][i - 3].pibuf + : new buffer(kinfo[eng][i].file_size); + kinfo[eng][i].pobuf = + i >= 3 ? kinfo[eng][i - 3].pobuf : new buffer(outputSize); + kinfo[eng][i].pobuf_decompress = (char *)malloc(kinfo[eng][i].file_size); + } + } + + // Create events for the various parts of the execution so that we can profile + // their performance. + event e_input_dma [kNumEngines][buffers_count]; // Input to the GZIP engine. This is a transfer from host to device. + event e_output_dma [kNumEngines][buffers_count]; // Output from the GZIP engine. This is transfer from device to host. + event e_crc_dma [kNumEngines][buffers_count]; // Transfer CRC from device to host + event e_size_dma [kNumEngines][buffers_count]; // Transfer compressed file size from device to host + event e_k_crc [kNumEngines][buffers_count]; // CRC kernel + event e_k_lz [kNumEngines][buffers_count]; // LZ77 kernel + event e_k_huff [kNumEngines][buffers_count]; // Huffman Encoding kernel + +#ifndef FPGA_EMULATOR + dpc_common::TimeInterval perf_timer; +#endif + + + /*************************************************/ + /* Main loop where the actual execution happens */ + /*************************************************/ + for (int i = 0; i < buffers_count; i++) { + for (size_t eng = 0; eng < kNumEngines; eng++) { + // Transfer the input data, to be compressed, from host to device. + e_input_dma[eng][i] = q.submit([&](handler &h) { + auto in_data = + kinfo[eng][i].pibuf->get_access(h); + h.copy(kinfo[eng][i].pref_buffer, in_data); + }); + + /************************************/ + /************************************/ + /* LAUNCH GZIP ENGINE */ + /************************************/ + /************************************/ + SubmitGzipTasks(q, kinfo[eng][i].file_size, kinfo[eng][i].pibuf, + kinfo[eng][i].pobuf, kinfo[eng][i].gzip_out_buf, + kinfo[eng][i].current_crc, kinfo[eng][i].last_block, + e_k_crc[eng][i], e_k_lz[eng][i], e_k_huff[eng][i], eng); + + // Transfer the output (compressed) data from device to host. + e_output_dma[eng][i] = q.submit([&](handler &h) { + auto out_data = kinfo[eng][i].pobuf->get_access(h); + h.copy(out_data, kinfo[eng][i].poutput_buffer); + }); + + // Transfer the file size of the compressed output file from device to host. + e_size_dma[eng][i] = q.submit([&](handler &h) { + auto out_data = + kinfo[eng][i].gzip_out_buf->get_access(h); + h.copy(out_data, kinfo[eng][i].out_info); + }); + + // Transfer the CRC of the compressed output file from device to host. + e_crc_dma[eng][i] = q.submit([&](handler &h) { + auto out_data = + kinfo[eng][i].current_crc->get_access(h); + h.copy(out_data, kinfo[eng][i].buffer_crc); + }); + } + } + + // Wait for all kernels to complete + for (int eng = 0; eng < kNumEngines; eng++) { + for (int i = 0; i < buffers_count; i++) { + e_output_dma[eng][i].wait(); + e_size_dma[eng][i].wait(); + e_crc_dma[eng][i].wait(); + } + } + +// Stop the timer. +#ifndef FPGA_EMULATOR + double diff_total = perf_timer.Elapsed(); + double gbps = iterations * isz / (double)diff_total / 1000000000.0; +#endif + + // Check the compressed file size from each iteration. Make sure the size is actually + // less-than-or-equal to the input size. Also calculate the remaining CRC. + size_t compressed_sz[kNumEngines]; + for (int eng = 0; eng < kNumEngines; eng++) { + compressed_sz[eng] = 0; + for (int i = 0; i < buffers_count; i++) { + if (kinfo[eng][i].out_info[0].compression_sz > kinfo[eng][i].file_size) { + std::cerr << "Unsupported: compressed file larger than input file( " + << kinfo[eng][i].out_info[0].compression_sz << " )\n"; + return 1; + } + // The majority of the CRC is calculated by the CRC kernel on the FPGA. But the kernel + // operates on quantized chunks of input data, so any remaining input data, that falls + // outside the quanta, is included in the overall CRC calculation via the following + // function that runs on the host. The last argument is the running CRC that was computed + // on the FPGA. + kinfo[eng][i].buffer_crc[0] = + Crc32(kinfo[eng][i].pref_buffer, kinfo[eng][i].file_size, + kinfo[eng][i].buffer_crc[0]); + // Accumulate the compressed size across all iterations. Used to + // compute compression ratio later. + compressed_sz[eng] += kinfo[eng][i].out_info[0].compression_sz; + } + } + + // delete the file mapping now that all kernels are complete, and we've + // snapped the time delta + if (prepin) { + free(pinbuf, q.get_context()); + } else { + delete pinbuf; + } + + // Write the output compressed data from the first iteration of each engine, to a file. + for (int eng = 0; eng < kNumEngines; eng++) { + // WriteBlockGzip() returns 1 on failure + if (report && WriteBlockGzip(input_file, outfilenames[eng], kinfo[eng][0].poutput_buffer, + kinfo[eng][0].out_info[0].compression_sz, + kinfo[eng][0].file_size, kinfo[eng][0].buffer_crc[0])) { + std::cout << "FAILED\n"; + return 1; + } + } + + // Decompress the output from engine-0 and compare against the input file. Only engine-0's + // output is verified since all engines are fed the same input data. + if (report && CompareGzipFiles(input_file, outfilenames[0])) { + std::cout << "FAILED\n"; + return 1; + } + + // Generate throughput report + // First gather all the execution times. + size_t time_k_crc[kNumEngines]; + size_t time_k_lz[kNumEngines]; + size_t time_k_huff[kNumEngines]; + size_t time_input_dma[kNumEngines]; + size_t time_output_dma[kNumEngines]; + for (int eng = 0; eng < kNumEngines; eng++) { + time_k_crc[eng] = 0; + time_k_lz[eng] = 0; + time_k_huff[eng] = 0; + time_input_dma[eng] = 0; + time_output_dma[eng] = 0; + for (int i = 0; i < buffers_count; i++) { + e_k_crc[eng][i].wait(); + e_k_lz[eng][i].wait(); + e_k_huff[eng][i].wait(); + time_k_crc[eng] += SyclGetExecTimeNs(e_k_crc[eng][i]); + time_k_lz[eng] += SyclGetExecTimeNs(e_k_lz[eng][i]); + time_k_huff[eng] += SyclGetExecTimeNs(e_k_huff[eng][i]); + time_input_dma[eng] += SyclGetExecTimeNs(e_input_dma[eng][i]); + time_output_dma[eng] += SyclGetExecTimeNs(e_output_dma[eng][i]); + } + } + + if (report) { + double compression_ratio = + (double)((double)compressed_sz[0] / (double)isz / iterations); +#ifndef FPGA_EMULATOR + std::cout << "Throughput: " << kNumEngines * gbps << " GB/s\n\n"; + for (int eng = 0; eng < kNumEngines; eng++) { + std::cout << "TP breakdown for engine #" << eng << " (GB/s)\n"; + std::cout << "CRC = " << iterations * isz / (double)time_k_crc[eng] + << "\n"; + std::cout << "LZ77 = " << iterations * isz / (double)time_k_lz[eng] + << "\n"; + std::cout << "Huffman Encoding = " + << iterations * isz / (double)time_k_huff[eng] << "\n"; + std::cout << "DMA host-to-device = " + << iterations * isz / (double)time_input_dma[eng] << "\n"; + std::cout << "DMA device-to-host = " + << iterations * isz / (double)time_output_dma[eng] << "\n\n"; + } +#endif + std::cout << "Compression Ratio " << compression_ratio * 100 << "%\n"; + } + + // Cleanup anything that was allocated by this routine. + for (int eng = 0; eng < kNumEngines; eng++) { + for (int i = 0; i < buffers_count; i++) { + if (i < 3) { + delete kinfo[eng][i].gzip_out_buf; + delete kinfo[eng][i].current_crc; + delete kinfo[eng][i].pibuf; + delete kinfo[eng][i].pobuf; + if (prepin) { + free(kinfo[eng][i].poutput_buffer, q.get_context()); + } else { + free(kinfo[eng][i].poutput_buffer); + } + } + free(kinfo[eng][i].pobuf_decompress); + } + free(kinfo[eng]); + } + + if (report) std::cout << "PASSED\n"; + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.cpp new file mode 100755 index 0000000000..01d69c1f9b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.cpp @@ -0,0 +1,2406 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +/* + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include + +#include "gzipkernel.hpp" +#include "kernels.hpp" + +using namespace sycl; + +// This reference design uses a template-based unroller. It's also possible +// to specify this in a more concise way using a pragma. See the loop unroll +// tutorial for more information. +template +struct Unroller { + template + static void step(const Action &action) { + action(Begin); + Unroller::step(action); + } +}; + +template +struct Unroller { + template + static void step(const Action &action) {} +}; + +int GetHuffLiteralBits(unsigned char ch) { + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + return static_ltree[ch].code; +} + +int GetHuffLiteralLen(unsigned char ch) { + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + return static_ltree[ch].len; +} + +int GetHuffRunLen(int len, int initial_dist) { + int lc; + unsigned code; + int extra; + int dist; + int local_lbits, local_llen; + int local_dbits, local_dlen; + local_lbits = 0; + local_llen = 0; + + int base_length[kLengthCodes] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, + 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 0, + }; + + int extra_lbits[kLengthCodes] // extra bits for each length code + = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, + 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + + // distance codes. The first 256 values correspond to the distances + // 3 .. 258, the last 256 values correspond to the top 8 bits of + // the 15 bit distances. + unsigned char dist_code[512] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, + 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 0, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, + 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, + }; + // length code for each normalized match length (0 == kMinMatch) + unsigned char length_code[kMaxMatch - kMinMatch + 1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, + 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 28, + }; + + int extra_dbits[kDCodes] // extra bits for each distance code + = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + + int base_dist[kDCodes] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, + }; + + CtData static_dtree[kDCodes] = { + {0, 5}, {16, 5}, {8, 5}, {24, 5}, {4, 5}, {20, 5}, {12, 5}, {28, 5}, + {2, 5}, {18, 5}, {10, 5}, {26, 5}, {6, 5}, {22, 5}, {14, 5}, {30, 5}, + {1, 5}, {17, 5}, {9, 5}, {25, 5}, {5, 5}, {21, 5}, {13, 5}, {29, 5}, + {3, 5}, {19, 5}, {11, 5}, {27, 5}, {7, 5}, {23, 5}, + }; + + lc = len - kMinMatch; + code = length_code[lc]; + + local_lbits = static_ltree[code + kLiterals + 1].code; + local_llen = static_ltree[code + kLiterals + 1].len; + extra = extra_lbits[code]; + if (extra) { + lc -= base_length[code]; + local_lbits |= lc << local_llen; + local_llen += extra; + } + + dist = initial_dist; + dist--; + code = d_code(dist); + local_dbits = static_dtree[code].code; + local_dlen = static_dtree[code].len; + extra = extra_dbits[code]; + if (extra) { + dist -= base_dist[code]; + local_dbits |= dist << local_dlen; + local_dlen += extra; + } + + local_lbits |= local_dbits << local_llen; + local_llen += local_dlen; + + return local_llen; +} + +int GetHuffRunBits(int len, int initial_dist) { + int lc; + unsigned code; + int extra; + int dist; + int local_lbits, local_llen; + int local_dbits, local_dlen; + local_lbits = 0; + local_llen = 0; + + int base_length[kLengthCodes] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, + 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 0, + }; + + int extra_lbits[kLengthCodes] // extra bits for each length code + = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, + 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + + // distance codes. The first 256 values correspond to the distances + // 3 .. 258, the last 256 values correspond to the top 8 bits of + // the 15 bit distances. + unsigned char dist_code[512] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, + 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 0, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, + 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, + }; + // length code for each normalized match length (0 == kMinMatch) + unsigned char length_code[kMaxMatch - kMinMatch + 1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, + 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 28, + }; + + int extra_dbits[kDCodes] // extra bits for each distance code + = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + + int base_dist[kDCodes] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, + }; + + CtData static_dtree[kDCodes] = { + {0, 5}, {16, 5}, {8, 5}, {24, 5}, {4, 5}, {20, 5}, {12, 5}, {28, 5}, + {2, 5}, {18, 5}, {10, 5}, {26, 5}, {6, 5}, {22, 5}, {14, 5}, {30, 5}, + {1, 5}, {17, 5}, {9, 5}, {25, 5}, {5, 5}, {21, 5}, {13, 5}, {29, 5}, + {3, 5}, {19, 5}, {11, 5}, {27, 5}, {7, 5}, {23, 5}, + }; + + lc = len - kMinMatch; + code = length_code[lc]; + + local_lbits = static_ltree[code + kLiterals + 1].code; + local_llen = static_ltree[code + kLiterals + 1].len; + extra = extra_lbits[code]; + if (extra) { + lc -= base_length[code]; + local_lbits |= lc << local_llen; + local_llen += extra; + } + + dist = initial_dist; + dist--; + code = d_code(dist); + local_dbits = static_dtree[code].code; + local_dlen = static_dtree[code].len; + extra = extra_dbits[code]; + if (extra) { + dist -= base_dist[code]; + local_dbits |= dist << local_dlen; + local_dlen += extra; + } + + local_lbits |= local_dbits << local_llen; + local_llen += local_dlen; + + return local_lbits; +} + +int GetHuffLen(int len, int dist, unsigned char ch) { + int returned_len; + + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + switch (len) { + case -3: + returned_len = static_ltree[kEndBlock].len; + break; + case -2: + returned_len = 3; + break; + case -1: + returned_len = 0; + break; + case 0: + returned_len = GetHuffLiteralLen(ch); + break; + default: + returned_len = GetHuffRunLen(len, dist); + break; + } + return returned_len; +} + +int IsValid(int len, int dist, unsigned char ch) { + switch (len) { + case -3: + return 1; + case -2: + return 1; + case -1: + return 0; + case 0: + return 1; + default: + return 1; + } +} + +int GetHuffBits(int len, int dist, unsigned char ch) { + int bits; + CtData static_ltree[kLCodes + 2] = { + {12, 8}, {140, 8}, {76, 8}, {204, 8}, {44, 8}, {172, 8}, {108, 8}, + {236, 8}, {28, 8}, {156, 8}, {92, 8}, {220, 8}, {60, 8}, {188, 8}, + {124, 8}, {252, 8}, {2, 8}, {130, 8}, {66, 8}, {194, 8}, {34, 8}, + {162, 8}, {98, 8}, {226, 8}, {18, 8}, {146, 8}, {82, 8}, {210, 8}, + {50, 8}, {178, 8}, {114, 8}, {242, 8}, {10, 8}, {138, 8}, {74, 8}, + {202, 8}, {42, 8}, {170, 8}, {106, 8}, {234, 8}, {26, 8}, {154, 8}, + {90, 8}, {218, 8}, {58, 8}, {186, 8}, {122, 8}, {250, 8}, {6, 8}, + {134, 8}, {70, 8}, {198, 8}, {38, 8}, {166, 8}, {102, 8}, {230, 8}, + {22, 8}, {150, 8}, {86, 8}, {214, 8}, {54, 8}, {182, 8}, {118, 8}, + {246, 8}, {14, 8}, {142, 8}, {78, 8}, {206, 8}, {46, 8}, {174, 8}, + {110, 8}, {238, 8}, {30, 8}, {158, 8}, {94, 8}, {222, 8}, {62, 8}, + {190, 8}, {126, 8}, {254, 8}, {1, 8}, {129, 8}, {65, 8}, {193, 8}, + {33, 8}, {161, 8}, {97, 8}, {225, 8}, {17, 8}, {145, 8}, {81, 8}, + {209, 8}, {49, 8}, {177, 8}, {113, 8}, {241, 8}, {9, 8}, {137, 8}, + {73, 8}, {201, 8}, {41, 8}, {169, 8}, {105, 8}, {233, 8}, {25, 8}, + {153, 8}, {89, 8}, {217, 8}, {57, 8}, {185, 8}, {121, 8}, {249, 8}, + {5, 8}, {133, 8}, {69, 8}, {197, 8}, {37, 8}, {165, 8}, {101, 8}, + {229, 8}, {21, 8}, {149, 8}, {85, 8}, {213, 8}, {53, 8}, {181, 8}, + {117, 8}, {245, 8}, {13, 8}, {141, 8}, {77, 8}, {205, 8}, {45, 8}, + {173, 8}, {109, 8}, {237, 8}, {29, 8}, {157, 8}, {93, 8}, {221, 8}, + {61, 8}, {189, 8}, {125, 8}, {253, 8}, {19, 9}, {275, 9}, {147, 9}, + {403, 9}, {83, 9}, {339, 9}, {211, 9}, {467, 9}, {51, 9}, {307, 9}, + {179, 9}, {435, 9}, {115, 9}, {371, 9}, {243, 9}, {499, 9}, {11, 9}, + {267, 9}, {139, 9}, {395, 9}, {75, 9}, {331, 9}, {203, 9}, {459, 9}, + {43, 9}, {299, 9}, {171, 9}, {427, 9}, {107, 9}, {363, 9}, {235, 9}, + {491, 9}, {27, 9}, {283, 9}, {155, 9}, {411, 9}, {91, 9}, {347, 9}, + {219, 9}, {475, 9}, {59, 9}, {315, 9}, {187, 9}, {443, 9}, {123, 9}, + {379, 9}, {251, 9}, {507, 9}, {7, 9}, {263, 9}, {135, 9}, {391, 9}, + {71, 9}, {327, 9}, {199, 9}, {455, 9}, {39, 9}, {295, 9}, {167, 9}, + {423, 9}, {103, 9}, {359, 9}, {231, 9}, {487, 9}, {23, 9}, {279, 9}, + {151, 9}, {407, 9}, {87, 9}, {343, 9}, {215, 9}, {471, 9}, {55, 9}, + {311, 9}, {183, 9}, {439, 9}, {119, 9}, {375, 9}, {247, 9}, {503, 9}, + {15, 9}, {271, 9}, {143, 9}, {399, 9}, {79, 9}, {335, 9}, {207, 9}, + {463, 9}, {47, 9}, {303, 9}, {175, 9}, {431, 9}, {111, 9}, {367, 9}, + {239, 9}, {495, 9}, {31, 9}, {287, 9}, {159, 9}, {415, 9}, {95, 9}, + {351, 9}, {223, 9}, {479, 9}, {63, 9}, {319, 9}, {191, 9}, {447, 9}, + {127, 9}, {383, 9}, {255, 9}, {511, 9}, {0, 7}, {64, 7}, {32, 7}, + {96, 7}, {16, 7}, {80, 7}, {48, 7}, {112, 7}, {8, 7}, {72, 7}, + {40, 7}, {104, 7}, {24, 7}, {88, 7}, {56, 7}, {120, 7}, {4, 7}, + {68, 7}, {36, 7}, {100, 7}, {20, 7}, {84, 7}, {52, 7}, {116, 7}, + {3, 8}, {131, 8}, {67, 8}, {195, 8}, {35, 8}, {163, 8}, {99, 8}, + {227, 8}, + }; + switch (len) { + case -3: + bits = static_ltree[kEndBlock].code; + break; + case -2: + bits = ch; + break; + case -1: + bits = 0; + break; + case 0: + bits = GetHuffLiteralBits(ch); + break; + default: + bits = GetHuffRunBits(len, dist); + break; + } + return bits; +} + +// assembles up to kVecX2 unsigned char values based on given huffman encoding +// writes up to kMaxHuffcodeBits * kVecX2 bits to memory +bool HufEnc(char *len, short *dist, unsigned char *data, unsigned int *outdata, + unsigned int *leftover, unsigned short *leftover_size) { + // array that contains the bit position of each symbol + unsigned short bitpos[kVec + 1]; + bitpos[0] = 0; + + Unroller<0, kVec>::step([&](int i) { + bitpos[i + 1] = bitpos[i] + (IsValid(len[i], dist[i], data[i]) + ? GetHuffLen(len[i], dist[i], data[i]) + : 0); + }); + + // leftover is an array that carries huffman encoded data not yet written to + // memory adjust leftover_size with the number of bits to write this time + unsigned short prev_cycle_offset = *leftover_size; + *leftover_size += (bitpos[kVec] & 0x3fff); + + // we'll write this cycle if we have collected enough data (kVec shorts or + // more) + bool write = *leftover_size & (kVec * (kMaxHuffcodeBits * 2)); + + // subtract kVec shorts from leftover size (if it's bigger + // than kVec) because we'll write those out this cycle + *leftover_size &= ~(kVec * (kMaxHuffcodeBits * 2)); + + // Adjust bitpos based on leftover offset from previous cycle + Unroller<0, kVec>::step( + [&](int i) { bitpos[i] += (prev_cycle_offset & 0x3fff); }); + + // Huffman codes have any bit alignement, so they can spill + // onto two shorts in the output array + // use ushort2 to keep each part of the code separate + // Iterate over all codes and construct ushort2 containing + // the code properly aligned + struct Uint2Gzip code[kVec]; + Unroller<0, kVec>::step([&](int i) { + code[i].x = 0; + code[i].y = 0; + }); + + Unroller<0, kVec>::step([&](int i) { + // Codes can be more than 16 bits, so use uint32 + unsigned int curr_code = GetHuffBits(len[i], dist[i], data[i]); + unsigned char bitpos_in_short = bitpos[i] & 0x01F; + + unsigned long long temp = (unsigned long long)curr_code << bitpos_in_short; + unsigned int temp1 = (unsigned int)temp; + unsigned int temp2 = temp >> 32ULL; + + if (IsValid(len[i], dist[i], data[i])) { + code[i].x = temp1; + code[i].y = temp2; + } else { + code[i].x = temp1; + code[i].y = temp2; + } + }); + + // Iterate over all destination locations and gather the required data + unsigned int new_leftover[kVec]; + Unroller<0, kVec>::step([&](int i) { + new_leftover[i] = 0; + outdata[i] = 0; + + Unroller<0, kVec>::step([&](int j) { + // figure out whether code[j] goes into bucket[i] + bool match_first = ((bitpos[j] >> 5) & (kVec - 1)) == i; + bool match_second = + ((bitpos[j] >> 5) & (kVec - 1)) == ((i - 1) & (kVec - 1)); + + // if code[j] maps onto current bucket then OR its code, else OR with 0 + unsigned int component = + match_first ? code[j].x : (match_second ? code[j].y : 0); + + // overflow from kVec shorts, need to move onto new_leftover + bool use_later = + (bitpos[j] & (kVec * (kMaxHuffcodeBits * 2))) || + (match_second && (((bitpos[j] >> 5) & (kVec - 1)) == kVec - 1)); + + // write to output + outdata[i] |= use_later ? 0 : component; + new_leftover[i] |= use_later ? component : 0; + }); + }); + + // Apply previous leftover on the outdata + // Also, if didn't write, apply prev leftover onto newleftover + Unroller<0, kVec>::step([&](int i) { + outdata[i] |= leftover[i]; + leftover[i] = outdata[i]; + }); + + // Split unroll into two unrolls to avoid compiler crash. This is a temporary + // workaround while awaiting a compiler feature. + if (write) { + Unroller<0, kVec>::step([&](int i) { leftover[i] = new_leftover[i]; }); + } + + return write; +} + +template +class CRC; +template +class LZReduction; +template +class StaticHuffman; +template +void SubmitGzipTasksSingleEngine( + queue &q, + size_t block_size, // size of block to compress. + buffer *pibuf, buffer *pobuf, + buffer *gzip_out_buf, + buffer *result_crc, bool last_block, event &e_crc, event &e_lz, + event &e_huff) { + using acc_dist_channel = intel::pipe; + using acc_dist_channel_last = intel::pipe; + + e_crc = q.submit([&](handler &h) { + auto accessor_isz = block_size; + auto acc_pibuf = pibuf->get_access(h); + auto accresult_crc = result_crc->get_access(h); + h.single_task>([=]() [[intel::kernel_args_restrict]] { + const unsigned int table64[64][16] = { + { + 0x0, + 0xf1da05aa, + 0x38c50d15, + 0xc91f08bf, + 0x718a1a2a, + 0x80501f80, + 0x494f173f, + 0xb8951295, + 0xe3143454, + 0x12ce31fe, + 0xdbd13941, + 0x2a0b3ceb, + 0x929e2e7e, + 0x63442bd4, + 0xaa5b236b, + 0x5b8126c1, + }, + + { + 0x0, + 0x1d596ee9, + 0x3ab2ddd2, + 0x27ebb33b, + 0x7565bba4, + 0x683cd54d, + 0x4fd76676, + 0x528e089f, + 0xeacb7748, + 0xf79219a1, + 0xd079aa9a, + 0xcd20c473, + 0x9faeccec, + 0x82f7a205, + 0xa51c113e, + 0xb8457fd7, + }, + + { + 0x0, + 0xee7e8d1, + 0x1dcfd1a2, + 0x13283973, + 0x3b9fa344, + 0x35784b95, + 0x265072e6, + 0x28b79a37, + 0x773f4688, + 0x79d8ae59, + 0x6af0972a, + 0x64177ffb, + 0x4ca0e5cc, + 0x42470d1d, + 0x516f346e, + 0x5f88dcbf, + }, + + { + 0x0, + 0xee7e8d10, + 0x78c1c61, + 0xe9f29171, + 0xf1838c2, + 0xe166b5d2, + 0x89424a3, + 0xe6eaa9b3, + 0x1e307184, + 0xf04efc94, + 0x19bc6de5, + 0xf7c2e0f5, + 0x11284946, + 0xff56c456, + 0x16a45527, + 0xf8dad837, + }, + + { + 0x0, + 0x3c60e308, + 0x78c1c610, + 0x44a12518, + 0xf1838c20, + 0xcde36f28, + 0x89424a30, + 0xb522a938, + 0x38761e01, + 0x416fd09, + 0x40b7d811, + 0x7cd73b19, + 0xc9f59221, + 0xf5957129, + 0xb1345431, + 0x8d54b739, + }, + + { + 0x0, + 0x70ec3c02, + 0xe1d87804, + 0x91344406, + 0x18c1f649, + 0x682dca4b, + 0xf9198e4d, + 0x89f5b24f, + 0x3183ec92, + 0x416fd090, + 0xd05b9496, + 0xa0b7a894, + 0x29421adb, + 0x59ae26d9, + 0xc89a62df, + 0xb8765edd, + }, + + { + 0x0, + 0x6307d924, + 0xc60fb248, + 0xa5086b6c, + 0x576e62d1, + 0x3469bbf5, + 0x9161d099, + 0xf26609bd, + 0xaedcc5a2, + 0xcddb1c86, + 0x68d377ea, + 0xbd4aece, + 0xf9b2a773, + 0x9ab57e57, + 0x3fbd153b, + 0x5cbacc1f, + }, + + { + 0x0, + 0x86c88d05, + 0xd6e01c4b, + 0x5028914e, + 0x76b13ed7, + 0xf079b3d2, + 0xa051229c, + 0x2699af99, + 0xed627dae, + 0x6baaf0ab, + 0x3b8261e5, + 0xbd4aece0, + 0x9bd34379, + 0x1d1bce7c, + 0x4d335f32, + 0xcbfbd237, + }, + + { + 0x0, + 0x1b5fd1d, + 0x36bfa3a, + 0x2de0727, + 0x6d7f474, + 0x7620969, + 0x5bc0e4e, + 0x409f353, + 0xdafe8e8, + 0xc1a15f5, + 0xec412d2, + 0xf71efcf, + 0xb781c9c, + 0xacde181, + 0x813e6a6, + 0x9a61bbb, + }, + + { + 0x0, + 0x1b5fd1d0, + 0x36bfa3a0, + 0x2de07270, + 0x6d7f4740, + 0x76209690, + 0x5bc0e4e0, + 0x409f3530, + 0xdafe8e80, + 0xc1a15f50, + 0xec412d20, + 0xf71efcf0, + 0xb781c9c0, + 0xacde1810, + 0x813e6a60, + 0x9a61bbb0, + }, + + { + 0x0, + 0x6e8c1b41, + 0xdd183682, + 0xb3942dc3, + 0x61416b45, + 0xfcd7004, + 0xbc595dc7, + 0xd2d54686, + 0xc282d68a, + 0xac0ecdcb, + 0x1f9ae008, + 0x7116fb49, + 0xa3c3bdcf, + 0xcd4fa68e, + 0x7edb8b4d, + 0x1057900c, + }, + + { + 0x0, + 0x5e74ab55, + 0xbce956aa, + 0xe29dfdff, + 0xa2a3ab15, + 0xfcd70040, + 0x1e4afdbf, + 0x403e56ea, + 0x9e36506b, + 0xc042fb3e, + 0x22df06c1, + 0x7cabad94, + 0x3c95fb7e, + 0x62e1502b, + 0x807cadd4, + 0xde080681, + }, + + { + 0x0, + 0xe71da697, + 0x154a4b6f, + 0xf257edf8, + 0x2a9496de, + 0xcd893049, + 0x3fdeddb1, + 0xd8c37b26, + 0x55292dbc, + 0xb2348b2b, + 0x406366d3, + 0xa77ec044, + 0x7fbdbb62, + 0x98a01df5, + 0x6af7f00d, + 0x8dea569a, + }, + + { + 0x0, + 0xaa525b78, + 0x8fd5b0b1, + 0x2587ebc9, + 0xc4da6723, + 0x6e883c5b, + 0x4b0fd792, + 0xe15d8cea, + 0x52c5c807, + 0xf897937f, + 0xdd1078b6, + 0x774223ce, + 0x961faf24, + 0x3c4df45c, + 0x19ca1f95, + 0xb39844ed, + }, + + { + 0x0, + 0xa58b900e, + 0x9066265d, + 0x35edb653, + 0xfbbd4afb, + 0x5e36daf5, + 0x6bdb6ca6, + 0xce50fca8, + 0x2c0b93b7, + 0x898003b9, + 0xbc6db5ea, + 0x19e625e4, + 0xd7b6d94c, + 0x723d4942, + 0x47d0ff11, + 0xe25b6f1f, + }, + + { + 0x0, + 0x5817276e, + 0xb02e4edc, + 0xe83969b2, + 0xbb2d9bf9, + 0xe33abc97, + 0xb03d525, + 0x5314f24b, + 0xad2a31b3, + 0xf53d16dd, + 0x1d047f6f, + 0x45135801, + 0x1607aa4a, + 0x4e108d24, + 0xa629e496, + 0xfe3ec3f8, + }, + + { + 0x0, + 0x81256527, + 0xd93bcc0f, + 0x581ea928, + 0x69069e5f, + 0xe823fb78, + 0xb03d5250, + 0x31183777, + 0xd20d3cbe, + 0x53285999, + 0xb36f0b1, + 0x8a139596, + 0xbb0ba2e1, + 0x3a2ec7c6, + 0x62306eee, + 0xe3150bc9, + }, + + { + 0x0, + 0x7f6b7f3d, + 0xfed6fe7a, + 0x81bd8147, + 0x26dcfab5, + 0x59b78588, + 0xd80a04cf, + 0xa7617bf2, + 0x4db9f56a, + 0x32d28a57, + 0xb36f0b10, + 0xcc04742d, + 0x6b650fdf, + 0x140e70e2, + 0x95b3f1a5, + 0xead88e98, + }, + + { + 0x0, + 0x9b73ead4, + 0xed96d3e9, + 0x76e5393d, + 0x5ca193, + 0x9b2f4b47, + 0xedca727a, + 0x76b998ae, + 0xb94326, + 0x9bcaa9f2, + 0xed2f90cf, + 0x765c7a1b, + 0xe5e2b5, + 0x9b960861, + 0xed73315c, + 0x7600db88, + }, + + { + 0x0, + 0x172864c, + 0x2e50c98, + 0x3978ad4, + 0x5ca1930, + 0x4b89f7c, + 0x72f15a8, + 0x65d93e4, + 0xb943260, + 0xae6b42c, + 0x9713ef8, + 0x803b8b4, + 0xe5e2b50, + 0xf2cad1c, + 0xcbb27c8, + 0xdc9a184, + }, + + { + 0x0, + 0x172864c0, + 0x2e50c980, + 0x3978ad40, + 0x5ca19300, + 0x4b89f7c0, + 0x72f15a80, + 0x65d93e40, + 0xb9432600, + 0xae6b42c0, + 0x9713ef80, + 0x803b8b40, + 0xe5e2b500, + 0xf2cad1c0, + 0xcbb27c80, + 0xdc9a1840, + }, + + { + 0x0, + 0xa9f74a41, + 0x889f92c3, + 0x2168d882, + 0xca4e23c7, + 0x63b96986, + 0x42d1b104, + 0xeb26fb45, + 0x4fed41cf, + 0xe61a0b8e, + 0xc772d30c, + 0x6e85994d, + 0x85a36208, + 0x2c542849, + 0xd3cf0cb, + 0xa4cbba8a, + }, + + { + 0x0, + 0x9fda839e, + 0xe4c4017d, + 0x7b1e82e3, + 0x12f904bb, + 0x8d238725, + 0xf63d05c6, + 0x69e78658, + 0x25f20976, + 0xba288ae8, + 0xc136080b, + 0x5eec8b95, + 0x370b0dcd, + 0xa8d18e53, + 0xd3cf0cb0, + 0x4c158f2e, + }, + + { + 0x0, + 0x4be412ec, + 0x97c825d8, + 0xdc2c3734, + 0xf4e14df1, + 0xbf055f1d, + 0x63296829, + 0x28cd7ac5, + 0x32b39da3, + 0x79578f4f, + 0xa57bb87b, + 0xee9faa97, + 0xc652d052, + 0x8db6c2be, + 0x519af58a, + 0x1a7ee766, + }, + + { + 0x0, + 0x65673b46, + 0xcace768c, + 0xafa94dca, + 0x4eedeb59, + 0x2b8ad01f, + 0x84239dd5, + 0xe144a693, + 0x9ddbd6b2, + 0xf8bcedf4, + 0x5715a03e, + 0x32729b78, + 0xd3363deb, + 0xb65106ad, + 0x19f84b67, + 0x7c9f7021, + }, + + { + 0x0, + 0xe0c6ab25, + 0x1afc500b, + 0xfa3afb2e, + 0x35f8a016, + 0xd53e0b33, + 0x2f04f01d, + 0xcfc25b38, + 0x6bf1402c, + 0x8b37eb09, + 0x710d1027, + 0x91cbbb02, + 0x5e09e03a, + 0xbecf4b1f, + 0x44f5b031, + 0xa4331b14, + }, + + { + 0x0, + 0xd7e28058, + 0x74b406f1, + 0xa35686a9, + 0xe9680de2, + 0x3e8a8dba, + 0x9ddc0b13, + 0x4a3e8b4b, + 0x9a11d85, + 0xde439ddd, + 0x7d151b74, + 0xaaf79b2c, + 0xe0c91067, + 0x372b903f, + 0x947d1696, + 0x439f96ce, + }, + + { + 0x0, + 0x13423b0a, + 0x26847614, + 0x35c64d1e, + 0x4d08ec28, + 0x5e4ad722, + 0x6b8c9a3c, + 0x78cea136, + 0x9a11d850, + 0x8953e35a, + 0xbc95ae44, + 0xafd7954e, + 0xd7193478, + 0xc45b0f72, + 0xf19d426c, + 0xe2df7966, + }, + + { + 0x0, + 0xef52b6e1, + 0x5d46b83, + 0xea86dd62, + 0xba8d706, + 0xe4fa61e7, + 0xe7cbc85, + 0xe12e0a64, + 0x1751ae0c, + 0xf80318ed, + 0x1285c58f, + 0xfdd7736e, + 0x1cf9790a, + 0xf3abcfeb, + 0x192d1289, + 0xf67fa468, + }, + + { + 0x0, + 0x2ea35c18, + 0x5d46b830, + 0x73e5e428, + 0xba8d7060, + 0x942e2c78, + 0xe7cbc850, + 0xc9689448, + 0xae6be681, + 0x80c8ba99, + 0xf32d5eb1, + 0xdd8e02a9, + 0x14e696e1, + 0x3a45caf9, + 0x49a02ed1, + 0x670372c9, + }, + + { + 0x0, + 0x87a6cb43, + 0xd43c90c7, + 0x539a5b84, + 0x730827cf, + 0xf4aeec8c, + 0xa734b708, + 0x20927c4b, + 0xe6104f9e, + 0x61b684dd, + 0x322cdf59, + 0xb58a141a, + 0x95186851, + 0x12bea312, + 0x4124f896, + 0xc68233d5, + }, + + { + 0x0, + 0x1751997d, + 0x2ea332fa, + 0x39f2ab87, + 0x5d4665f4, + 0x4a17fc89, + 0x73e5570e, + 0x64b4ce73, + 0xba8ccbe8, + 0xaddd5295, + 0x942ff912, + 0x837e606f, + 0xe7caae1c, + 0xf09b3761, + 0xc9699ce6, + 0xde38059b, + }, + + { + 0x0, + 0xae689191, + 0x87a02563, + 0x29c8b4f2, + 0xd4314c87, + 0x7a59dd16, + 0x539169e4, + 0xfdf9f875, + 0x73139f4f, + 0xdd7b0ede, + 0xf4b3ba2c, + 0x5adb2bbd, + 0xa722d3c8, + 0x94a4259, + 0x2082f6ab, + 0x8eea673a, + }, + + { + 0x0, + 0xe6273e9e, + 0x173f7b7d, + 0xf11845e3, + 0x2e7ef6fa, + 0xc859c864, + 0x39418d87, + 0xdf66b319, + 0x5cfdedf4, + 0xbadad36a, + 0x4bc29689, + 0xade5a817, + 0x72831b0e, + 0x94a42590, + 0x65bc6073, + 0x839b5eed, + }, + + { + 0x0, + 0xb9fbdbe8, + 0xa886b191, + 0x117d6a79, + 0x8a7c6563, + 0x3387be8b, + 0x22fad4f2, + 0x9b010f1a, + 0xcf89cc87, + 0x7672176f, + 0x670f7d16, + 0xdef4a6fe, + 0x45f5a9e4, + 0xfc0e720c, + 0xed731875, + 0x5488c39d, + }, + + { + 0x0, + 0x44629f4f, + 0x88c53e9e, + 0xcca7a1d1, + 0xcafb7b7d, + 0x8e99e432, + 0x423e45e3, + 0x65cdaac, + 0x4e87f0bb, + 0xae56ff4, + 0xc642ce25, + 0x8220516a, + 0x847c8bc6, + 0xc01e1489, + 0xcb9b558, + 0x48db2a17, + }, + + { + 0x0, + 0x9d0fe176, + 0xe16ec4ad, + 0x7c6125db, + 0x19ac8f1b, + 0x84a36e6d, + 0xf8c24bb6, + 0x65cdaac0, + 0x33591e36, + 0xae56ff40, + 0xd237da9b, + 0x4f383bed, + 0x2af5912d, + 0xb7fa705b, + 0xcb9b5580, + 0x5694b4f6, + }, + + { + 0x0, + 0x66b23c6c, + 0xcd6478d8, + 0xabd644b4, + 0x41b9f7f1, + 0x270bcb9d, + 0x8cdd8f29, + 0xea6fb345, + 0x8373efe2, + 0xe5c1d38e, + 0x4e17973a, + 0x28a5ab56, + 0xc2ca1813, + 0xa478247f, + 0xfae60cb, + 0x691c5ca7, + }, + + { + 0x0, + 0xdd96d985, + 0x605cb54b, + 0xbdca6cce, + 0xc0b96a96, + 0x1d2fb313, + 0xa0e5dfdd, + 0x7d730658, + 0x5a03d36d, + 0x87950ae8, + 0x3a5f6626, + 0xe7c9bfa3, + 0x9abab9fb, + 0x472c607e, + 0xfae60cb0, + 0x2770d535, + }, + + { + 0x0, + 0xb407a6da, + 0xb37e4bf5, + 0x779ed2f, + 0xbd8d91ab, + 0x98a3771, + 0xef3da5e, + 0xbaf47c84, + 0xa06a2517, + 0x146d83cd, + 0x13146ee2, + 0xa713c838, + 0x1de7b4bc, + 0xa9e01266, + 0xae99ff49, + 0x1a9e5993, + }, + + { + 0x0, + 0x9ba54c6f, + 0xec3b9e9f, + 0x779ed2f0, + 0x3063b7f, + 0x98a37710, + 0xef3da5e0, + 0x7498e98f, + 0x60c76fe, + 0x9da93a91, + 0xea37e861, + 0x7192a40e, + 0x50a4d81, + 0x9eaf01ee, + 0xe931d31e, + 0x72949f71, + }, + + { + 0x0, + 0xc18edfc, + 0x1831dbf8, + 0x14293604, + 0x3063b7f0, + 0x3c7b5a0c, + 0x28526c08, + 0x244a81f4, + 0x60c76fe0, + 0x6cdf821c, + 0x78f6b418, + 0x74ee59e4, + 0x50a4d810, + 0x5cbc35ec, + 0x489503e8, + 0x448dee14, + }, + + { + 0x0, + 0xc18edfc0, + 0x586cb9c1, + 0x99e26601, + 0xb0d97382, + 0x7157ac42, + 0xe8b5ca43, + 0x293b1583, + 0xbac3e145, + 0x7b4d3e85, + 0xe2af5884, + 0x23218744, + 0xa1a92c7, + 0xcb944d07, + 0x52762b06, + 0x93f8f4c6, + }, + + { + 0x0, + 0xaef6c4cb, + 0x869c8fd7, + 0x286a4b1c, + 0xd64819ef, + 0x78bedd24, + 0x50d49638, + 0xfe2252f3, + 0x77e1359f, + 0xd917f154, + 0xf17dba48, + 0x5f8b7e83, + 0xa1a92c70, + 0xf5fe8bb, + 0x2735a3a7, + 0x89c3676c, + }, + + { + 0x0, + 0xefc26b3e, + 0x4f5d03d, + 0xeb37bb03, + 0x9eba07a, + 0xe629cb44, + 0xd1e7047, + 0xe2dc1b79, + 0x13d740f4, + 0xfc152bca, + 0x172290c9, + 0xf8e0fbf7, + 0x1a3ce08e, + 0xf5fe8bb0, + 0x1ec930b3, + 0xf10b5b8d, + }, + + { + 0x0, + 0x27ae81e8, + 0x4f5d03d0, + 0x68f38238, + 0x9eba07a0, + 0xb9148648, + 0xd1e70470, + 0xf6498598, + 0xe6050901, + 0xc1ab88e9, + 0xa9580ad1, + 0x8ef68b39, + 0x78bf0ea1, + 0x5f118f49, + 0x37e20d71, + 0x104c8c99, + }, + + { + 0x0, + 0x177b1443, + 0x2ef62886, + 0x398d3cc5, + 0x5dec510c, + 0x4a97454f, + 0x731a798a, + 0x64616dc9, + 0xbbd8a218, + 0xaca3b65b, + 0x952e8a9e, + 0x82559edd, + 0xe634f314, + 0xf14fe757, + 0xc8c2db92, + 0xdfb9cfd1, + }, + + { + 0x0, + 0xacc04271, + 0x82f182a3, + 0x2e31c0d2, + 0xde920307, + 0x72524176, + 0x5c6381a4, + 0xf0a3c3d5, + 0x6655004f, + 0xca95423e, + 0xe4a482ec, + 0x4864c09d, + 0xb8c70348, + 0x14074139, + 0x3a3681eb, + 0x96f6c39a, + }, + + { + 0x0, + 0xccaa009e, + 0x4225077d, + 0x8e8f07e3, + 0x844a0efa, + 0x48e00e64, + 0xc66f0987, + 0xac50919, + 0xd3e51bb5, + 0x1f4f1b2b, + 0x91c01cc8, + 0x5d6a1c56, + 0x57af154f, + 0x9b0515d1, + 0x158a1232, + 0xd92012ac, + }, + + { + 0x0, + 0x7cbb312b, + 0xf9766256, + 0x85cd537d, + 0x299dc2ed, + 0x5526f3c6, + 0xd0eba0bb, + 0xac509190, + 0x533b85da, + 0x2f80b4f1, + 0xaa4de78c, + 0xd6f6d6a7, + 0x7aa64737, + 0x61d761c, + 0x83d02561, + 0xff6b144a, + }, + + { + 0x0, + 0xa6770bb4, + 0x979f1129, + 0x31e81a9d, + 0xf44f2413, + 0x52382fa7, + 0x63d0353a, + 0xc5a73e8e, + 0x33ef4e67, + 0x959845d3, + 0xa4705f4e, + 0x20754fa, + 0xc7a06a74, + 0x61d761c0, + 0x503f7b5d, + 0xf64870e9, + }, + + { + 0x0, + 0x67de9cce, + 0xcfbd399c, + 0xa863a552, + 0x440b7579, + 0x23d5e9b7, + 0x8bb64ce5, + 0xec68d02b, + 0x8816eaf2, + 0xefc8763c, + 0x47abd36e, + 0x20754fa0, + 0xcc1d9f8b, + 0xabc30345, + 0x3a0a617, + 0x647e3ad9, + }, + + { + 0x0, + 0xcb5cd3a5, + 0x4dc8a10b, + 0x869472ae, + 0x9b914216, + 0x50cd91b3, + 0xd659e31d, + 0x1d0530b8, + 0xec53826d, + 0x270f51c8, + 0xa19b2366, + 0x6ac7f0c3, + 0x77c2c07b, + 0xbc9e13de, + 0x3a0a6170, + 0xf156b2d5, + }, + + { + 0x0, + 0x3d6029b, + 0x7ac0536, + 0x47a07ad, + 0xf580a6c, + 0xc8e08f7, + 0x8f40f5a, + 0xb220dc1, + 0x1eb014d8, + 0x1d661643, + 0x191c11ee, + 0x1aca1375, + 0x11e81eb4, + 0x123e1c2f, + 0x16441b82, + 0x15921919, + }, + + { + 0x0, + 0x3d6029b0, + 0x7ac05360, + 0x47a07ad0, + 0xf580a6c0, + 0xc8e08f70, + 0x8f40f5a0, + 0xb220dc10, + 0x30704bc1, + 0xd106271, + 0x4ab018a1, + 0x77d03111, + 0xc5f0ed01, + 0xf890c4b1, + 0xbf30be61, + 0x825097d1, + }, + + { + 0x0, + 0x60e09782, + 0xc1c12f04, + 0xa121b886, + 0x58f35849, + 0x3813cfcb, + 0x9932774d, + 0xf9d2e0cf, + 0xb1e6b092, + 0xd1062710, + 0x70279f96, + 0x10c70814, + 0xe915e8db, + 0x89f57f59, + 0x28d4c7df, + 0x4834505d, + }, + + { + 0x0, + 0xb8bc6765, + 0xaa09c88b, + 0x12b5afee, + 0x8f629757, + 0x37def032, + 0x256b5fdc, + 0x9dd738b9, + 0xc5b428ef, + 0x7d084f8a, + 0x6fbde064, + 0xd7018701, + 0x4ad6bfb8, + 0xf26ad8dd, + 0xe0df7733, + 0x58631056, + }, + + { + 0x0, + 0x5019579f, + 0xa032af3e, + 0xf02bf8a1, + 0x9b14583d, + 0xcb0d0fa2, + 0x3b26f703, + 0x6b3fa09c, + 0xed59b63b, + 0xbd40e1a4, + 0x4d6b1905, + 0x1d724e9a, + 0x764dee06, + 0x2654b999, + 0xd67f4138, + 0x866616a7, + }, + + { + 0x0, + 0x1c26a37, + 0x384d46e, + 0x246be59, + 0x709a8dc, + 0x6cbc2eb, + 0x48d7cb2, + 0x54f1685, + 0xe1351b8, + 0xfd13b8f, + 0xd9785d6, + 0xc55efe1, + 0x91af964, + 0x8d89353, + 0xa9e2d0a, + 0xb5c473d, + }, + + { + 0x0, + 0x1c26a370, + 0x384d46e0, + 0x246be590, + 0x709a8dc0, + 0x6cbc2eb0, + 0x48d7cb20, + 0x54f16850, + 0xe1351b80, + 0xfd13b8f0, + 0xd9785d60, + 0xc55efe10, + 0x91af9640, + 0x8d893530, + 0xa9e2d0a0, + 0xb5c473d0, + }, + + { + 0x0, + 0x191b3141, + 0x32366282, + 0x2b2d53c3, + 0x646cc504, + 0x7d77f445, + 0x565aa786, + 0x4f4196c7, + 0xc8d98a08, + 0xd1c2bb49, + 0xfaefe88a, + 0xe3f4d9cb, + 0xacb54f0c, + 0xb5ae7e4d, + 0x9e832d8e, + 0x87981ccf, + }, + + { + 0x0, + 0x4ac21251, + 0x958424a2, + 0xdf4636f3, + 0xf0794f05, + 0xbabb5d54, + 0x65fd6ba7, + 0x2f3f79f6, + 0x3b83984b, + 0x71418a1a, + 0xae07bce9, + 0xe4c5aeb8, + 0xcbfad74e, + 0x8138c51f, + 0x5e7ef3ec, + 0x14bce1bd, + }, + + { + 0x0, + 0x77073096, + 0xee0e612c, + 0x990951ba, + 0x76dc419, + 0x706af48f, + 0xe963a535, + 0x9e6495a3, + 0xedb8832, + 0x79dcb8a4, + 0xe0d5e91e, + 0x97d2d988, + 0x9b64c2b, + 0x7eb17cbd, + 0xe7b82d07, + 0x90bf1d91, + }, + + { + 0x0, + 0x1db71064, + 0x3b6e20c8, + 0x26d930ac, + 0x76dc4190, + 0x6b6b51f4, + 0x4db26158, + 0x5005713c, + 0xedb88320, + 0xf00f9344, + 0xd6d6a3e8, + 0xcb61b38c, + 0x9b64c2b0, + 0x86d3d2d4, + 0xa00ae278, + 0xbdbdf21c, + }, + }; + + const int num_nibbles_parallel = 64; + + const int num_sections = accessor_isz / (num_nibbles_parallel / + 2); // how many loop iterations + unsigned int result = ~0; + + for (int i = 0; i < num_sections; i++) { + unsigned int result_update_odd = 0; + unsigned int result_update_even = 0; +// which 4 bit chunk within the section -- this loop can be unrolled, the +// total update for the crc is the xor of the updates from the nibbles + #pragma unroll + for (int nib = 0; nib < num_nibbles_parallel; nib++) { + unsigned char this_input_nibble = + (acc_pibuf[(i * num_nibbles_parallel + nib) / 2] >> + (4 * (nib % 2))); + unsigned char this_result_nibble = + (nib < 8) ? (result >> (4 * nib)) : 0; + unsigned char this_table_index = + this_input_nibble ^ this_result_nibble; + if (nib % 2) { + result_update_odd ^= table64[nib][this_table_index & 0xf]; + } else { + result_update_even ^= table64[nib][this_table_index & 0xf]; + } + } + result = result_update_odd ^ result_update_even; + } + + accresult_crc[0] = ~result; + }); + }); + + e_lz = q.submit([&](handler &h) { + auto accessor_isz = block_size; + auto acc_pibuf = pibuf->get_access(h); + + h.single_task>([=]() [[intel::kernel_args_restrict]] { + //------------------------------------- + // Hash Table(s) + //------------------------------------- + + [[intelfpga::singlepump]] [[intelfpga::numbanks(kVec)]] [ + [intelfpga::max_replicates(kVec)]] struct { + unsigned char s[kLen]; + } dictionary[kDepth][kVec]; + + [[intelfpga::singlepump]] [[intelfpga::numbanks(kVec)]] [ + [intelfpga::max_replicates( + kVec)]] unsigned int dict_offset[kDepth][kVec]; + + // Initialize history to empty. + for (int i = 0; i < kDepth; i++) { + Unroller<0, kVec>::step([&](int k) { dict_offset[i][k] = 0; }); + } + + // This is the window of data on which we look for matches + // We fetch twice our data size because we have kVec offsets + unsigned char current_window[kVecX2]; + + // This is the window of data on which we look for matches + // We fetch twice our data size because we have kVec offsets + unsigned char compare_window[kLen][kVec][kVec]; + // kVec bytes per dict----------| | | + // kVec dictionaries-----------------| | + // one for each curr win offset---------| + + // load offset into these arrays + unsigned int compare_offset[kVec][kVec]; + // one per kVec bytes----------| | + // one for each compwin-------------| + + // Initialize input stream position + unsigned int inpos_minus_vec_div_16 = 0; + + // this is ceiling of (insize-kVec)/16, original comparison was + // inpos < insize, now inpos is carried as (inpos-kVec)/16 so this is what + // we compare to + unsigned int insize_compare = (accessor_isz) / kVec; + + int ctr = insize_compare = insize_compare - 1; + + char first_valid_pos = 0; + + struct DistLen dist_offs_data; + + int distchan_ndx = 0; + size_t inpos = 0; + + // load in new data + struct LzInput in; + Unroller<0, kVec>::step([&](int i) { in.data[i] = acc_pibuf[inpos++]; }); + + Unroller<0, kVec>::step( + [&](int i) { current_window[i + kVec] = in.data[i]; }); + + do { + //----------------------------- + // Prepare current window + //----------------------------- + + // shift current window + Unroller<0, kVec>::step( + [&](int i) { current_window[i] = current_window[i + kVec]; }); + + // load in new data + Unroller<0, kVec>::step( + [&](int i) { in.data[i] = acc_pibuf[inpos++]; }); + + Unroller<0, kVec>::step( + [&](int i) { current_window[kVec + i] = in.data[i]; }); + + //----------------------------- + // Compute hash + //----------------------------- + + unsigned short hash[kVec]; + + Unroller<0, kVec>::step([&](int i) { + hash[i] = (current_window[i] ^ (current_window[i + 1] << 6) ^ + (current_window[i + 2] << 2) ^ current_window[i + 3]) & + kHashMask; + }); + + //----------------------------- + // Dictionary look-up + //----------------------------- + + // loop over kVec compare windows, each has a different hash + Unroller<0, kVec>::step([&](int i) { + // loop over all kVec bytes + Unroller<0, kLen>::step([&](int j) { + Unroller<0, kVec>::step([&](int k) { + compare_window[k][j][i] = dictionary[hash[i]][j].s[k]; + }); + }); + }); + + // loop over compare windows + Unroller<0, kVec>::step([&](int i) { + Unroller<0, kLen>::step([&](int j) { + // loop over frames in this compare window + // (they come from different dictionaries) + compare_offset[j][i] = dict_offset[hash[i]][j]; + }); + }); + + //----------------------------- + // Dictionary update + //----------------------------- + + // loop over different dictionaries to store different frames + // store one frame per dictionary + // loop over kVec bytes to store + Unroller<0, kLen>::step([&](int i) { + Unroller<0, kVec>::step([&](int j) { + // store actual bytes + dictionary[hash[i]][i].s[j] = current_window[i + j]; + }); + }); + + Unroller<0, kVec>::step([&](int i) { + // loop over kVec different dictionaries and write one word to each + dict_offset[hash[i]][i] = + (inpos_minus_vec_div_16 << 4) | + i; // inpos - kVec + 0, we know that inpos - kVec has 0 as the 4 + // lower bits so really just concatenate + }); + + //----------------------------- + // Match search + //----------------------------- + + // arrays to store length, best length etc.. + unsigned char length[kVec]; + bool done[kVec]; + char best_length[kVec]; + unsigned int best_offset[kVec]; + + // initialize best_length + Unroller<0, kVec>::step([&](int i) { + best_length[i] = 0; + best_offset[i] = 0; + }); + + // loop over each comparison window frame + // one comes from each dictionary + Unroller<0, kVec>::step([&](int i) { + // initialize length and done + Unroller<0, kVec>::step([&](int l) { + length[l] = 0; + done[l] = 0; + }); + + // loop over each current window + Unroller<0, kVec>::step([&](int j) { + // loop over each char in the current window + // and corresponding char in comparison window + Unroller<0, kLen>::step([&](int k) { + bool comp = + current_window[k + j] == compare_window[k][i][j] && !done[j]; + length[j] += comp; + done[j] = !comp; + }); + }); + + // Check if this the best length + Unroller<0, kVec>::step([&](int m) { + bool update_best = + (length[m] > best_length[m]) && (compare_offset[i][m] != 0) && + (((inpos_minus_vec_div_16 << kVecPow) | (i & (kVec - 1))) - + (compare_offset[i][m]) < + kMaxDistance); + + unsigned int new_offset = + (((inpos_minus_vec_div_16 << kVecPow) | (m & (kVec - 1))) & + 0x7ffff) - + ((compare_offset[i][m] & 0x7ffff)); + + // Reconsider if new_offset is bigger than current offset, might + // take more bytes to encode + update_best = update_best && (length[m] == best_length[m]) && + (new_offset > best_offset[m]) + ? false + : update_best; + + best_offset[m] = (update_best ? new_offset : best_offset[m]) & + 0x7ffff; // 19 bits is sufficient + + best_length[m] = (update_best ? length[m] : best_length[m]) & + 0x1f; // 5 bits is sufficient + }); + }); + + //----------------------------- + // Filter matches step 1 + //----------------------------- + + // remove matches with offsets that are <= 0: this means they're + // self-matching or didn't match and keep only the matches that, when + // encoded, take fewer bytes than the actual match length + Unroller<0, kVec>::step([&](int i) { + best_length[i] = (((best_length[i] & 0x1f) >= 3) && + ((best_offset[i]) < kMaxDistance) + ? best_length[i] + : 0) & + 0x1f; // 5 bits is sufficient + + // Second level filter - remove matches with len 3, greater than + // kTooFar + best_length[i] = + (((best_length[i] & 0x1f) == 3) && ((best_offset[i]) > kTooFar) + ? 0 + : best_length[i]) & + 0x1f; // 5 bits is sufficient + // don't emmit matches for last iteration as some of the + // second part of the window might be undefined + if (ctr == 0) best_length[i] = 0; + }); + + //----------------------------- + // Assign first_valid_pos + //----------------------------- + + // first_valid_pos is loop-carried, and tricky to compute. So first + // compute it speculatively in parallel for every possible value of the + // previous first_valid_pos. + char first_valid_pos_speculative[kVec]; + + Unroller<0, kVec>::step([&](int guess) { + unsigned char next_match_search = guess; + Unroller<0, kVec>::step([&](int i) { + unsigned int len = best_length[i]; + + // Skip to the next match + next_match_search = + i >= next_match_search && len > 0 ? i + len : next_match_search; + }); + + first_valid_pos_speculative[guess] = + next_match_search - kVec > 0 ? next_match_search - kVec : 0; + }); + + // For kVec=16 (the largest currently supported), this should be a 16:1 + // mux, which is 2 6LUTs deep. For larger kVec, it will be worse. + unsigned char current_valid_pos = first_valid_pos; + first_valid_pos = + first_valid_pos_speculative[first_valid_pos & (kVec - 1)] & + (kVec - + 1); // first_valid_pos only needs 4 bits, make this explicit + + // greedy match selection + Unroller<0, (kVec)>::step([&](int i) { + unsigned int len = best_length[i]; + best_length[i] = i < current_valid_pos ? -1 : best_length[i]; + // Skip to the next match + current_valid_pos = + i >= current_valid_pos && len > 0 ? i + len : current_valid_pos; + }); + + //----------------------------- + // Setup LZ dist/len pairs to push to Huffman encode kernel + //----------------------------- + + Unroller<0, kVec>::step([&](int i) { + dist_offs_data.data[i] = 0; + dist_offs_data.len[i] = -1; + dist_offs_data.dist[i] = -1; + if (best_length[i] >= 0) { + dist_offs_data.data[i] = current_window[i]; + dist_offs_data.len[i] = best_length[i]; + dist_offs_data.dist[i] = best_offset[i]; + } + }); + + acc_dist_channel::write(dist_offs_data); + + // increment input position + inpos_minus_vec_div_16++; + distchan_ndx += 1; + ctr--; + + } while (ctr >= 0); + + const char lasti = accessor_isz - (accessor_isz & ~(kVec - 1)); + const char firstpos = first_valid_pos; + Unroller<0, kVec>::step([&](unsigned char i) { + dist_offs_data.data[i] = 0; + dist_offs_data.len[i] = -1; + dist_offs_data.dist[i] = -1; + }); + + Unroller<0, kVec>::step([&](unsigned char i) { + bool pred = + ((i - firstpos) < (lasti - firstpos)) && ((i - firstpos) >= 0); + dist_offs_data.data[i] = pred ? current_window[i + kVec] : 0; + dist_offs_data.len[i] = pred ? 0 : -1; + }); + + acc_dist_channel_last::write(dist_offs_data); + }); + }); + + e_huff = q.submit([&](handler &h) { + auto accessor_isz = block_size; + auto acc_gzip_out = + gzip_out_buf->get_access(h); + auto accessor_output = pobuf->get_access(h); + auto acc_eof = last_block ? 1 : 0; + h.single_task>([= + ]() [[intel::kernel_args_restrict]] { + unsigned int leftover[kVec] = {0}; + Unroller<0, kVec>::step([&](int i) { leftover[i] = 0; }); + + unsigned short leftover_size = 0; + + unsigned int outpos_huffman = 0; + + int ctr = ((accessor_isz) / kVec) + 2; + int odx = 0; + + // Add the gzip start block marker. Assumes static huffman trees. + leftover_size = 3; + leftover[0] = ((kStaticTrees << 1) + (acc_eof)); + + do { + struct DistLen in; + // init the input structure for the gzip end block marker. + // this is the very last data block to be encoded and written. + Unroller<0, kVec>::step([&](int i) { + in.len[i] = -1; + in.dist[i] = -1; + in.data[i] = 0; + }); + in.len[0] = ctr == 1 ? -3 : -1; + in.data[0] = 0; + + in = ctr > 2 ? acc_dist_channel::read() + : (ctr == 2 ? acc_dist_channel_last::read() : in); + + struct HuffmanOutput outdata; + outdata.write = HufEnc(in.len, in.dist, in.data, outdata.data, leftover, + &leftover_size); + + // prevent out of bounds write + if (((ctr == 0) || outdata.write) && (odx < accessor_isz)) { + Unroller<0, kVec * sizeof(unsigned int)>::step([&](int i) { + accessor_output[odx + i] = + (ctr == 0) ? (unsigned char)(leftover[(i >> 2) & 0xf] >> + ((i & 3) << 3)) + : (unsigned char)(outdata.data[(i >> 2) & 0xf] >> + ((i & 3) << 3)); + }); + } + + outpos_huffman = outdata.write ? outpos_huffman + 1 : outpos_huffman; + odx += outdata.write ? (sizeof(unsigned int) << kVecPow) : 0; + + } while (ctr--); + + // Store summary values from lz and huffman + acc_gzip_out[0].compression_sz = + (outpos_huffman * sizeof(unsigned int) * kVec) + + (leftover_size + 7) / 8; + }); + }); +} + +void SubmitGzipTasks(queue &q, + size_t block_size, // size of block to compress. + buffer *pibuf, buffer *pobuf, + buffer *gzip_out_buf, + buffer *result_crc, bool last_block, + event &e_crc, event &e_lz, event &e_huff, + size_t engineID) { + // Statically declare the engines so that the hardware is created for them. + // But at run time, the host can dynamically select which engine(s) to use via + // engineID. + if (engineID == 0) { + SubmitGzipTasksSingleEngine<0>(q, block_size, pibuf, pobuf, gzip_out_buf, + result_crc, last_block, e_crc, e_lz, e_huff); + } + + #if NUM_ENGINES > 1 + if (engineID == 1) { + SubmitGzipTasksSingleEngine<1>(q, block_size, pibuf, pobuf, gzip_out_buf, + result_crc, last_block, e_crc, e_lz, e_huff); + } + #endif + + // If this reference design is to be expanded to > 2 engines, declare them here. + +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.hpp new file mode 100755 index 0000000000..7de9a3ea17 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/gzipkernel.hpp @@ -0,0 +1,45 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __GZIPKERNEL_H__ +#define __GZIPKERNEL_H__ +#pragma once + +#include + +using namespace cl::sycl; + +extern "C" void SubmitGzipTasks( + queue &sycl_device, + size_t block_size, // size of block to compress. + buffer *pibuf, buffer *pobuf, + buffer *gzip_out_buf, + buffer *current_crc, bool last_block, event &e_crc, + event &e_lz, event &e_huff, size_t engineID); + +#endif //__GZIPKERNEL_H__ diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/kernels.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/kernels.hpp new file mode 100755 index 0000000000..65f207bab7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/gzip/src/kernels.hpp @@ -0,0 +1,148 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#ifndef __KERNELS_H__ +#define __KERNELS_H__ +#pragma once + +#ifndef NUM_ENGINES + #define NUM_ENGINES 1 +#endif + +constexpr int kNumEngines = NUM_ENGINES; + +// kVecPow == 2 means kVec == 4. +// kVecPow == 3 means kVec == 8. +// kVecPow == 4 means kVec == 16. +constexpr int kVecPow = 4; + +constexpr int kVec = 1 << kVecPow; +constexpr int kVecX2 = 2 * kVec; + +constexpr int kHufTableSize = 256; + +// Maximum length of huffman codes +constexpr int kMaxHuffcodeBits = 16; + +struct Uint2Gzip { + unsigned int y; + unsigned int x; +}; + +struct LzInput { + unsigned char data[kVec]; +}; + +typedef struct DistLen { + unsigned char data[kVec]; + char len[kVec]; + short dist[kVec]; +} DistLen, *pdist_len_t; + +struct HuffmanOutput { + unsigned int data[kVec]; + bool write; +}; + +struct TrailingOutput { + int bytecount_left; + int bytecount; + unsigned char bytes[kVec * sizeof(unsigned int)]; +}; + +struct GzipOutInfo { + // final compressed block size + size_t compression_sz; + unsigned long crc; +}; + +// kLen must be == kVec +constexpr int kLen = kVec; + +// depth of the dictionary buffers +constexpr int kDepth = 512; + +// Assumes kDepth is a power of 2 number. +constexpr int kHashMask = kDepth - 1; + +#define CONSTANT __constant + +constexpr int kDebug = 1; +#define TRACE(x) \ + do { \ + if (kDebug) printf x; \ + } while (0) + +constexpr int kStaticTrees = 1; + +typedef struct CtData { + unsigned short code; + unsigned short len; +} CtData; + +constexpr int kMaxMatch = 258; +constexpr int kMinMatch = 3; + +constexpr int kTooFar = 4096; + +// All codes must not exceed kMaxBits +constexpr int kMaxBits = 15; + +// number of length codes, not counting the special kEndBlock code +constexpr int kLengthCodes = 29; + +// number of literal bytes, 0..255 +constexpr int kLiterals = 256; + +// end of literal code block +constexpr int kEndBlock = 256; + +// number of literal or length codes, including kEndBlock +constexpr int kLCodes = (kLiterals + 1 + kLengthCodes); + +// number of distance codes +constexpr int kDCodes = 30; + +// number of codes used to transfer the bit lengths +constexpr int kBLCodes = 19; + +constexpr int kMaxDistance = ((32 * 1024)); + +constexpr int kMinBufferSize = 16384; + +struct DictString { + unsigned char s[kLen]; +}; + +// Mapping from a distance to a distance code. dist is the distance - 1 and +// must not have side effects. dist_code[256] and dist_code[257] are never +// used. +#define d_code(dist) \ + ((dist) < 256 ? dist_code[dist] : dist_code[256 + ((dist) >> 7)]) + +#endif //__KERNELS_H__ diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/CMakeLists.txt new file mode 100755 index 0000000000..81cd1c747a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/CMakeLists.txt @@ -0,0 +1,12 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + + +cmake_minimum_required (VERSION 2.8) + +project(QRD) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/License.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/README.md b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/README.md new file mode 100755 index 0000000000..34288260cf --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/README.md @@ -0,0 +1,239 @@ +# QR Decomposition of Matrices +This DPC++ reference design demonstrates high-performance QR decomposition of complex matrices on FPGA. + +***Documentation***: The [FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a resource for general target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® PAC with Intel Stratix® 10 SX FPGA;
Intel Xeon® CPU E5-1650 v2 @ 3.50GHz (host machine) +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Implementing a high performance FPGA version of the Gram-Schmidt QR decomposition algorithm. +| Time to complete | 1 hr (not including compile time) + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + + +**Performance** +Please refer to performance disclaimer at the end of this README. + +| Device | Throughput +|:--- |:--- +| Intel® PAC with Intel Arria® 10 GX FPGA | 25k matrices/s for matrices of size 128 * 128 +| Intel® PAC with Intel Stratix® 10 SX FPGA | 7k matrices/s for matrices of size 256 * 256 + + +## Purpose + +This FPGA reference design demonstrates QR decomposition of matrices of complex numbers, a common operation employed in linear algebra. Matrix _A_ (input) is decomposed into a product of an orthogonal matrix _Q_ and an upper triangular matrix _R_. + +The algorithms employed by the reference design are the Gram-Schmidt QR decomposition algorithm and the thin QR factorization method. Background information on these algorithms can be found in Wikipedia's [QR decomposition](https://en.wikipedia.org/wiki/QR_decomposition) article. The original algorithm has been modified and optimized for performance on FPGAs in this implementation. + +QR decomposition is used extensively in signal processing applications such as beamforming, multiple-input multiple-output (MIMO) processing, and Space Time Adaptive Processing (STAP). + + +### Matrix dimensions and FPGA resources + +The QR decomposition algorithm factors a complex _m_×_n_ matrix, where _m_ ≥ _n_. The algorithm computes the vector dot product of two columns of the matrix. In our FPGA implementation, the dot product is computed in a loop over the _m_ elements of the column. The loop is fully unrolled to maximize throughput. As a result, *m* complex multiplication operations are performed in parallel on the FPGA, followed by sequential additions to compute the dot product result. + +We use the compiler flag `-fp-relaxed`, which permits the compiler to reorder floating point additions (i.e. to assume that floating point addition is commutative). The compiler uses this freedom to reorder the additions so that the dot product arithmetic can be optimally implemented using the FPGA's specialized floating point DSP (Digital Signal Processing) hardware. + +With this optimization, our FPGA implementation requires 4*m* DSPs to compute the complex floating point dot product. Thus, the matrix size is constrained by the total FPGA DSP resources available. Note that this upper bound is a consequence of this particular implementation. + +By default, the design is parameterized to process 128 × 128 matrices when compiled targeting Intel® PAC with Intel Arria® 10 GX FPGA. It is parameterized to process 256 × 256 matrices when compiled targeting Intel® PAC with Intel Stratix® 10 SX FPGA, a larger device. + + +## Key Implementation Details +| Kernel | Description +--- |--- +| QRD | Implements a modified Gram-Schmidt QR decomposition algorithm. + +To optimize the performance-critical loop in its algorithm, the design leverages concepts discussed in the following FPGA tutorials: +* **Triangular Loop Optimization** (triangular_loop) +* **Explicit Pipelining with `fpga_reg`** (fpga_register) +* **Loop `ivdep` Attribute** (loop_ivdep) +* **Unrolling Loops** (loop_unroll) + + The key optimization techniques used are as follows: + 1. Refactoring the algorithm to merge two dot products into one, reducing the total number of dot products needed to three from two. This helps us reduce the DSPs needed for the implementation. + 2. Converting the nested loop into a single merged loop and applying Triangular Loop optimizations. This allows us to generate a design that is very well pipelined. + 3. Fully vectorizing the dot products using loop unrolling. + 4. Using the compiler flag -Xsfp-relaxed to re-order floating point operations and allowing the inference of a specialised dot-product DSP. This further reduces the number of DSP blocks needed by the implementation, the overall latency, and pipeline depth. + 5. Using an efficient memory banking scheme to generate high performance hardware. + 6. Using the `fpga_reg` attribute to insert more pipeline stages where needed to improve the frequency achieved by the design. + +## License +This code sample is licensed under MIT license. + +## Building the Reference Design + +### Include Files +The include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Code Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 24h. + +### On a Linux* System +1. Install the design into a directory `build` from the design directory by running `cmake`: + + ``` + mkdir build + cd build + ``` + + If you are compiling for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + + ``` + cmake .. + ``` + + If instead you are compiling for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following targets are provided and they match the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device). + + ``` + make fpga_emu + ``` + + * Generate HTML performance report. Find the report in `qrd_report.prj/reports/report.html`directory. + + ``` + make report + ``` + + * Compile for FPGA hardware (longer compile time, targets FPGA device). + + ``` + make fpga + ``` + +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +Note: Ensure that Microsoft Visual Studio* (2017, or 2019 Version 16.4 or newer) with "Desktop development with C++" workload is installed on your system. + +1. Enter source file directory. + +``` +cd src +``` + +2. Compile the design. The following targets are provided and they match the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device). + + ``` + ninja fpga_emu + ``` + + * Generate HTML performance report. Find the report in `../src/qrd_report.prj/reports/report.html`directory. + + ``` + ninja report + ``` + + If you are targeting the Intel® PAC with Intel Stratix® 10 SX FPGA, please use the following target and find the report in `../src/qrd_s10_pac_report.prj/reports/report.html`. + + ``` + ninja report_s10_pac + ``` + + * **Not supported yet:** Compile and run on an FPGA hardware. + +### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this Reference Design in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Running the Reference Design +You can apply QR decomposition to a number of matrices as shown below. This step performs the following: +* Generates the number of random matrices specified as the command line argument (defaults to 1). +* Computes QR decomposition on all matrices. +* Evaluates performance. +NOTE: The design is optimized to perform best when run on a large number of matrices, where the total number of matrices is a power of 2. + + + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU). + ``` + ./qrd.fpga_emu (Linux) + qrd.fpga_emu.exe (Windows) + ``` + +2. Run the sample on the FPGA device. It is recommended to pass in an optional argument (as shown) when invoking the sample on hardware. Otherwise, the performance will not be representative. + ``` + ./qrd.fpga 40960 (Linux) + ``` +### Application Parameters + +| Argument | Description +--- |--- +| `` | Optional argument that specifies the number of matrices to decompose. Its default value is `1`. + +### Example of Output + +Example output when running on Intel® PAC with Intel Arria® 10 GX FPGA for 32768 matrices (each of consisting of 128*128 complex numbers): + +``` +Device name: pac_a10 : Intel PAC Platform (pac_f000000) +Generating 32768 random matrices +Running QR decomposition of 32768 matrices repeatedly + Total duration: 41.3763 s +Throughput: 25.3425k matrices/s +Verifying results on matrix 0 16384 32767 +PASSED +``` + +Example output when running on Intel® PAC with Intel Stratix® 10 SX FPGA for 40960 matrices (each of consisting of 256*256 complex numbers): + +``` +Device name: pac_s10 : Intel PAC Platform (pac_f100000) +Generating 4096 random matrices +Running QR decomposition of 4096 matrices repeatedly + Total duration: 17.3197 s +Throughput: 7.5678k matrices/s +Verifying results on matrix 0 2048 4095 +PASSED +``` + +## Additional Design Information + +### Compiler Flags Used + +| Flag | Description +--- |--- +`-Xshardware` | Target FPGA hardware (as opposed to FPGA emulator) +`-Xsclock=330MHz` | The FPGA backend attempts to achieve 330 MHz +`-Xsfp-relaxed` | Allows the FPGA backend to re-order floating point arithmetic operations (e.g. permit assuming (a + b + c) == (c + a + b) ) +`-Xsparallel=2` | Use 2 cores when compiling the bitstream through Quartus +`-Xsseed` | Specifies the Quartus compile seed, to yield slightly higher fmax +`-DROWS_COMPONENT` | Specifies the number of rows of the matrix +`-DCOLS_COMPONENT` | Specifies the number of columns of the matrix +`-DFIXED_ITERATIONS` | Used to set the ivdep safelen attribute for the performance critical triangular loop + +NOTE: The values for `seed`, `FIXED_ITERATIONS`, `ROWS_COMPONENT`, `COLS_COMPONENT` are set according to the board being targeted. + +### Performance disclaimers + +Tests document performance of components on a particular test, in specific systems. Differences in hardware, software, or configuration will affect actual performance. Consult other sources of information to evaluate performance as you consider your purchase. For more complete information about performance and benchmark results, visit [www.intel.com/benchmarks](www.intel.com/benchmarks). + +Performance results are based on testing as of July 29, 2020 and may not reflect all publicly available security updates. See configuration disclosure for details. No product or component can be absolutely secure. + +Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Performance varies depending on system configuration. Check with your system manufacturer or retailer or learn more at [intel.com](www.intel.com). + +The performance was measured by Intel on July 29, 2020. + +Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. + +(C) Intel Corporation. + + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.sln b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.sln new file mode 100755 index 0000000000..b5e086d1f5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "qrd", "qrd.vcxproj", "{ACDE6B7A-6F9A-428E-B040-CEDC5B1E2C79}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {ACDE6B7A-6F9A-428E-B040-CEDC5B1E2C79}.Debug|x64.ActiveCfg = Debug|x64 + {ACDE6B7A-6F9A-428E-B040-CEDC5B1E2C79}.Debug|x64.Build.0 = Debug|x64 + {ACDE6B7A-6F9A-428E-B040-CEDC5B1E2C79}.Release|x64.ActiveCfg = Release|x64 + {ACDE6B7A-6F9A-428E-B040-CEDC5B1E2C79}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {97D1BD74-AAAB-4835-8F00-37A58B70871A} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.vcxproj b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.vcxproj new file mode 100755 index 0000000000..95a7067c03 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/qrd.vcxproj @@ -0,0 +1,170 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + 15.0 + {acde6b7a-6f9a-428e-b040-cedc5b1e2c79} + Win32Proj + qrd + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR -DFIXED_ITERATIONS=64 -DROWS_COMPONENT=128 -DCOLS_COMPONENT=128 %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + -Xsclock=330MHz;-Xsfp-relaxed;-Xsparallel=2 + + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR -DFIXED_ITERATIONS=64 -DROWS_COMPONENT=128 -DCOLS_COMPONENT=128 %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + -Xsclock=330MHz;-Xsfp-relaxed;-Xsparallel=2 + + + + + + + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/sample.json b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/sample.json new file mode 100755 index 0000000000..aa107a266e --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/sample.json @@ -0,0 +1,57 @@ +{ + "guid": "3228581F-9DF8-4696-9B1C-0B31286B97C3", + "name": "QR Decomposition of Matrices on FPGA", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Reference Designs"], + "description": "Reference design demonstrating high-performance QR decomposition of complex matrices on FPGA", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "builder": ["ide", "cmake"], + "targetDevice": ["FPGA"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "env": [ + "export CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB" + ], + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./qrd.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "env": [ + "set CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB" + ], + "steps": [ + "cd src", + "ninja fpga_emu", + "qrd.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt new file mode 100755 index 0000000000..5003e6a357 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt @@ -0,0 +1,129 @@ +set(DEVICE_SOURCE_FILE qrd.cpp) +set(DEVICE_HEADER_FILE qrd.hpp) +set(HOST_SOURCE_FILE qrd_demo.cpp) +set(TARGET_NAME qrd) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) +set(REPORTS_TARGET ${TARGET_NAME}_report) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Design specific constant values +set(ROWS_COMPONENT_A10 128) +set(COLS_COMPONENT_A10 128) + +set(ROWS_COMPONENT_S10 256) +set(COLS_COMPONENT_S10 256) + +set(FIXED_ITERATIONS_A10 64) +set(FIXED_ITERATIONS_S10 105) + +set(SEED_A10 5) +set(SEED_S10 1) + +# Set parameter values assuming target is Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) +SET(FIXED_ITERATIONS ${FIXED_ITERATIONS_A10}) +SET(SEED ${SEED_A10}) +SET(ROWS_COMPONENT ${ROWS_COMPONENT_A10}) +SET(COLS_COMPONENT ${COLS_COMPONENT_A10}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + SET(FIXED_ITERATIONS ${FIXED_ITERATIONS_S10}) + SET(SEED ${SEED_S10}) + SET(ROWS_COMPONENT ${ROWS_COMPONENT_S10}) + SET(COLS_COMPONENT ${COLS_COMPONENT_S10}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS -fintelfpga -c -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT}) + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +separate_arguments(USER_HARDWARE_FLAGS) +set(HARDWARE_LINK_FLAGS -fintelfpga -Xshardware -Xsclock=330MHz -Xsfp-relaxed -Xsparallel=2 -Xsseed=${SEED} -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS} -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT}) +set(FINAL_LINK_FLAGS -fintelfpga -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT}) + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT}") +set(EMULATOR_LINK_FLAGS -fintelfpga ) + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${HOST_SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${DEVICE_SOURCE_FILE} ${HOST_SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set(DEVICE_FPGA_OBJ "qrd_fpga.o") + set(DEVICE_IMAGE_FPGA_OBJ "qrd_fpga.a") + set(HOST_FPGA_OBJ "qrd_host.o") + + add_custom_command(OUTPUT ${DEVICE_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_FPGA_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE}) + + add_custom_command(OUTPUT ${HOST_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_FPGA_OBJ} + DEPENDS ${HOST_SOURCE_FILE}) + + add_custom_command(OUTPUT ${DEVICE_IMAGE_FPGA_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link=image ${DEVICE_FPGA_OBJ} -o ${DEVICE_IMAGE_FPGA_OBJ} + DEPENDS ${DEVICE_FPGA_OBJ}) + + add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${FINAL_LINK_FLAGS} ${HOST_FPGA_OBJ} ${DEVICE_IMAGE_FPGA_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${DEVICE_IMAGE_FPGA_OBJ} ${HOST_FPGA_OBJ}) +endif() + +# fpga report +if(WIN32) + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + separate_arguments(WIN_FLAGS WINDOWS_COMMAND) + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${WIN_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE}) + +else() + add_custom_target(report DEPENDS ${REPORTS_TARGET} ) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} ${DEVICE_SOURCE_FILE} COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_HEADER_FILE} ${DEVICE_HEADER_FILE} COPYONLY) + + add_custom_command(OUTPUT ${REPORTS_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS} -fsycl-link ${DEVICE_SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORTS_TARGET} + DEPENDS ${DEVICE_SOURCE_FILE} ${DEVICE_HEADER_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) + diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/build.ninja b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/build.ninja new file mode 100755 index 0000000000..619923b204 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/build.ninja @@ -0,0 +1,32 @@ +device_source_file = qrd.cpp +device_header_file = qrd.hpp +host_source_file = qrd_demo.cpp +target_name = qrd + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -Xsclock=330MHz -Xsfp-relaxed -Xsparallel=2 +emulator_flags = -fintelfpga -DFPGA_EMULATOR -Xsfast-emulator + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} ${device_source_file} ${host_source_file} ${design_flags} -DFIXED_ITERATIONS=64 -DROWS_COMPONENT=128 -DCOLS_COMPONENT=128 -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 ${device_source_file} ${host_source_file} -DFIXED_ITERATIONS=64 -DROWS_COMPONENT=128 -DCOLS_COMPONENT=128 -Xsseed=5 -fsycl-link -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 ${device_source_file} ${host_source_file} -DFIXED_ITERATIONS=105 -DROWS_COMPONENT=256 -DCOLS_COMPONENT=256 -Xsseed=1 -fsycl-link -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu + +# report +build report: phony ${report_target} +build ${report_target}: gen_report + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.cpp new file mode 100755 index 0000000000..a6d973cbaa --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.cpp @@ -0,0 +1,318 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#include +#include +#include +#include +#include + +#include "qrd.hpp" + +using std::vector; +using namespace sycl; + +template +struct Unroller { + template + static void Step(const Action &action) { + action(begin); + Unroller::Step(action); + } +}; + +template +struct Unroller { + template + static void Step(const Action &action) {} +}; + +struct MyComplex { + float xx; + float yy; + MyComplex(float x, float y) { + xx = x; + yy = y; + } + MyComplex() {} + const MyComplex operator+(const MyComplex other) const { + return MyComplex(xx + other.xx, yy + other.yy); + } +}; + +MyComplex MulMycomplex(MyComplex a, MyComplex b) { + MyComplex c; + c.xx = a.xx * b.xx + a.yy * b.yy; + c.yy = a.yy * b.xx - a.xx * b.yy; + return c; +} + +// Forward declare the kernel name +// (This will become unnecessary in a future compiler version.) +class QRD; + +void QRDecomposition(vector &in_matrix, vector &out_matrix, queue &q, + size_t matrices, size_t reps) { + // Number of complex elements in the matrix + constexpr int kNumComplexElements = COLS_COMPONENT * ROWS_COMPONENT; + + // Sizes of allocated memories for input and output matrix + constexpr int kInputMatrixSize = kNumComplexElements * 2; + constexpr int kOutputMatrixSize = + (ROWS_COMPONENT + 1) * COLS_COMPONENT * 3; + + // Constants related to the memory configuration of the kernel's local + // memories + // We want 4 complex elements (2 floating point values) in each memory bank + constexpr int kNumElementsPerBank = 4; + // Set the bankwidth in bytes + constexpr int kBankwidth = kNumElementsPerBank * 8; + constexpr int kNumBanks = ROWS_COMPONENT / kNumElementsPerBank; + + constexpr int kLoadIter = kNumComplexElements / kNumElementsPerBank; + constexpr int kStoreIter = kNumComplexElements / kNumElementsPerBank; + constexpr short kNumBuffers = 4; + + // We will process 'chunk' number of matrices in each run of the kernel + short chunk = 2048; + if (matrices % chunk) { + chunk = 1; + } + + // Create buffers and allocate space for them. + buffer *input_matrix[kNumBuffers], *output_matrix[kNumBuffers]; + for (short i = 0; i < kNumBuffers; i++) { + input_matrix[i] = new buffer(kInputMatrixSize * chunk); + output_matrix[i] = new buffer(kOutputMatrixSize * chunk); + } + + for (size_t r = 0; r < reps; r++) { + for (size_t b = 0, it = 0; it < matrices; + it += chunk, b = (b + 1) % kNumBuffers) { + const float *kPtr = in_matrix.data() + kInputMatrixSize * it; + float *kPtr2 = out_matrix.data() + kOutputMatrixSize * it; + int matrices = chunk; + + q.submit([&](handler &h) { + auto in_matrix2 = + input_matrix[b]->get_access(h); + h.copy(kPtr, in_matrix2); + }); + + q.submit([&](handler &h) { + auto in_matrix = input_matrix[b]->get_access(h); + auto out_matrix = + output_matrix[b]->get_access(h); + auto out_matrix2 = out_matrix; + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (int l = 0; l < matrices; l++) { + [[intelfpga::bankwidth(kBankwidth), + intelfpga::numbanks(kNumBanks)]] struct { + MyComplex d[ROWS_COMPONENT]; + } a_matrix[COLS_COMPONENT], ap_matrix[COLS_COMPONENT], + aload_matrix[COLS_COMPONENT]; + + MyComplex vector_ai[ROWS_COMPONENT], vector_ti[ROWS_COMPONENT]; + MyComplex s_or_i[COLS_COMPONENT]; + + // Copy data from DDR memory to on-chip memory. + int idx = l * kNumComplexElements / kNumElementsPerBank; + for (short li = 0; li < kLoadIter; li++) { + MyComplex tmp[kNumElementsPerBank]; + Unroller<0, kNumElementsPerBank>::Step([&](int k) { + tmp[k].xx = in_matrix[idx * 2 * kNumElementsPerBank + k * 2]; + tmp[k].yy = + in_matrix[idx * 2 * kNumElementsPerBank + k * 2 + 1]; + }); + + idx++; + int jtmp = li % (kNumBanks); + + Unroller<0, kNumBanks>::Step([&](int k) { + Unroller<0, kNumElementsPerBank>::Step([&](int t) { + if (jtmp == k) { + aload_matrix[li / (kNumBanks)] + .d[k * kNumElementsPerBank + t].xx = tmp[t].xx; + aload_matrix[li / (kNumBanks)] + .d[k * kNumElementsPerBank + t].yy = tmp[t].yy; + } + + // Delay data signals to create a vine-based data distribution + // to lower signal fanout. + tmp[t].xx = intel::fpga_reg(tmp[t].xx); + tmp[t].yy = intel::fpga_reg(tmp[t].yy); + }); + + jtmp = intel::fpga_reg(jtmp); + }); + } + + float p_ii_x, i_r_ii_x; + short i = -1; + short j = N_VALUE - FIXED_ITERATIONS < 0 + ? (N_VALUE - FIXED_ITERATIONS) + : 0; + int qr_idx = l * kOutputMatrixSize / 2; + + [[intelfpga::ii(1)]] [[intelfpga::ivdep(FIXED_ITERATIONS)]] + for (int s = 0; s < ITERATIONS; s++) { + MyComplex vector_t[ROWS_COMPONENT]; + MyComplex sori[kNumBanks]; + + bool j_eq_i[kNumBanks], i_gt_0[kNumBanks], + i_ge_0_j_eq_i[kNumBanks], j_eq_i_plus_1[kNumBanks], + i_lt_0[kNumBanks]; + + Unroller<0, kNumBanks>::Step([&](int k) { + i_gt_0[k] = intel::fpga_reg(i > 0); + i_lt_0[k] = intel::fpga_reg(i < 0); + j_eq_i[k] = intel::fpga_reg(j == i); + i_ge_0_j_eq_i[k] = intel::fpga_reg(i >= 0 && j >= i); + j_eq_i_plus_1[k] = intel::fpga_reg(j == i + 1); + sori[k].xx = intel::fpga_reg(s_or_i[j].xx); + sori[k].yy = intel::fpga_reg(s_or_i[j].yy); + }); + + Unroller<0, ROWS_COMPONENT>::Step([&](int k) { + vector_t[k].xx = aload_matrix[j].d[k].xx; + vector_t[k].yy = aload_matrix[j].d[k].yy; + if (i_gt_0[k / kNumElementsPerBank]) { + vector_t[k].xx = a_matrix[j].d[k].xx; + vector_t[k].yy = a_matrix[j].d[k].yy; + } + if (j_eq_i[k / kNumElementsPerBank]) { + vector_ai[k].xx = vector_t[k].xx; + vector_ai[k].yy = vector_t[k].yy; + } + }); + + Unroller<0, ROWS_COMPONENT>::Step([&](int k) { + vector_t[k] = + MulMycomplex(vector_ai[k], + i_lt_0[k / kNumElementsPerBank] + ? MyComplex(0.0, 0.0) + : sori[k / kNumElementsPerBank]) + + (j_eq_i[k / kNumElementsPerBank] ? MyComplex(0.0, 0.0) + : vector_t[k]); + if (i_ge_0_j_eq_i[k / kNumElementsPerBank]) { + ap_matrix[j].d[k].xx = a_matrix[j].d[k].xx = + vector_t[k].xx; + ap_matrix[j].d[k].yy = a_matrix[j].d[k].yy = + vector_t[k].yy; + } + if (j_eq_i_plus_1[k / kNumElementsPerBank]) { + vector_ti[k] = vector_t[k]; + } + }); + + MyComplex p_ij = MyComplex(0, 0); + Unroller<0, ROWS_COMPONENT>::Step([&](int k) { + p_ij = p_ij + MulMycomplex(vector_t[k], vector_ti[k]); + }); + + if (j == i + 1) { + p_ii_x = p_ij.xx; + i_r_ii_x = rsqrt(p_ij.xx); + } + + MyComplex s_ij = + MyComplex(0.0f - (p_ij.xx) / p_ii_x, p_ij.yy / p_ii_x); + + if (j >= 0) { + s_or_i[j] = MyComplex(j == i + 1 ? i_r_ii_x : s_ij.xx, + j == i + 1 ? 0.0f : s_ij.yy); + } + + MyComplex r_ii = j == i + 1 ? MyComplex(sycl::sqrt(p_ii_x), 0.0) + : MyComplex(i_r_ii_x * p_ij.xx, + i_r_ii_x * p_ij.yy); + + if (j >= i + 1 && i + 1 < N_VALUE) { + out_matrix[qr_idx * 2] = r_ii.xx; + out_matrix[qr_idx * 2 + 1] = r_ii.yy; + qr_idx++; + } + + if (j == N_VALUE - 1) { + j = ((N_VALUE - FIXED_ITERATIONS) > i) + ? (i + 1) + : (N_VALUE - FIXED_ITERATIONS); + i++; + } else { + j++; + } + } + + qr_idx /= 4; + for (short si = 0; si < kStoreIter; si++) { + int desired = si % (kNumBanks); + bool get[kNumBanks]; + Unroller<0, kNumBanks>::Step([&](int k) { + get[k] = desired == k; + desired = intel::fpga_reg(desired); + }); + + MyComplex tmp[kNumElementsPerBank]; + Unroller<0, kNumBanks>::Step([&](int t) { + Unroller<0, kNumElementsPerBank>::Step([&](int k) { + tmp[k].xx = get[t] ? ap_matrix[si / (kNumBanks)] + .d[t * kNumElementsPerBank + k] + .xx + : intel::fpga_reg(tmp[k].xx); + tmp[k].yy = get[t] ? ap_matrix[si / (kNumBanks)] + .d[t * kNumElementsPerBank + k] + .yy + : intel::fpga_reg(tmp[k].yy); + }); + }); + + Unroller<0, 4>::Step([&](int k) { + out_matrix2[qr_idx * 2 * kNumElementsPerBank + k * 2] = + tmp[k].xx; + out_matrix2[qr_idx * 2 * kNumElementsPerBank + k * 2 + 1] = + tmp[k].yy; + }); + + qr_idx++; + } + } + }); + }); + + q.submit([&](handler &h) { + auto final_matrix = output_matrix[b]->get_access(h); + h.copy(final_matrix, kPtr2); + }); + } + } + + for (short b = 0; b < kNumBuffers; b++) { + delete input_matrix[b]; + delete output_matrix[b]; + } +} diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.hpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.hpp new file mode 100755 index 0000000000..4ada530ea7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd.hpp @@ -0,0 +1,43 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +// The values for FIXED_ITERATIONS, ROWS_COMPONENT and COLS_COMPONENT will be +// supplied by the build system (cmake/build.ninja) + +// Architecture/Design Parameters used to implement the triagular loop +// structure of the design. See the tutorial on triangular loop optimization +// for more details. +#define N_VALUE COLS_COMPONENT + +#define M_MINUS_COLS \ + (FIXED_ITERATIONS > COLS_COMPONENT ? FIXED_ITERATIONS - COLS_COMPONENT : 0) + +#define ITERATIONS \ + (COLS_COMPONENT + M_MINUS_COLS + (COLS_COMPONENT + 1) * COLS_COMPONENT / 2 + \ + FIXED_ITERATIONS * (FIXED_ITERATIONS - 1) / 2 - \ + M_MINUS_COLS * (M_MINUS_COLS - 1) / 2) diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp new file mode 100755 index 0000000000..4bee78a672 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp @@ -0,0 +1,233 @@ +// ============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// This agreement shall be governed in all respects by the laws of the State of +// California and by the laws of the United States of America. + +#include + +#include +#include +#include +#include + +#include "dpc_common.hpp" +#include "qrd.hpp" + +using namespace std; +using namespace std::chrono; +using namespace sycl; + +// Run the modified Gram-Schmidt QR Decomposition algorithm on the given +// matrices. The function will do the following: +// 1. Transfer the input matrices to the FPGA. +// 2. Run the algorithm. +// 3. Copy the output data back to host device. +// The above process is carried out 'reps' number of times. +void QRDecomposition(vector &in_matrix, vector &out_matrix, queue &q, + size_t matrices, size_t reps); + +int main(int argc, char *argv[]) { + constexpr size_t kRandomSeed = 1138; + constexpr size_t kRandomMin = 1; + constexpr size_t kRandomMax = 10; + + size_t matrices = argc > 1 ? atoi(argv[1]) : 1; + if (matrices < 1) { + cout << "Must run at least 1 matrix\n"; + return 1; + } + + try { +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + queue q = queue(device_selector, dpc_common::exception_handler); + device device = q.get_device(); + cout << "Device name: " << device.get_info().c_str() + << "\n"; + + vector a_matrix; + vector qr_matrix; + + constexpr size_t kAMatrixSizeFactor = ROWS_COMPONENT * COLS_COMPONENT * 2; + constexpr size_t kQRMatrixSizeFactor = + (ROWS_COMPONENT + 1) * COLS_COMPONENT * 3; + constexpr size_t kIndexAccessFactor = 2; + + a_matrix.resize(matrices * kAMatrixSizeFactor); + qr_matrix.resize(matrices * kQRMatrixSizeFactor); + + // For output-postprocessing + float q_matrix[ROWS_COMPONENT][COLS_COMPONENT][2]; + float r_matrix[COLS_COMPONENT][COLS_COMPONENT][2]; + + cout << "Generating " << matrices << " random matri" + << ((matrices == 1) ? "x " : "ces ") << "\n"; + + srand(kRandomSeed); + + for (size_t i = 0; i < matrices; i++) { + for (size_t row = 0; row < ROWS_COMPONENT; row++) { + for (size_t col = 0; col < COLS_COMPONENT; col++) { + int random_val = rand(); + float random_double = + random_val % (kRandomMax - kRandomMin) + kRandomMin; + a_matrix[i * kAMatrixSizeFactor + + col * ROWS_COMPONENT * kIndexAccessFactor + + row * kIndexAccessFactor] = random_double; + int random_val_imag = rand(); + random_double = + random_val_imag % (kRandomMax - kRandomMin) + kRandomMin; + a_matrix[i * kAMatrixSizeFactor + + col * ROWS_COMPONENT * kIndexAccessFactor + + row * kIndexAccessFactor + 1] = random_double; + } + } + } + + QRDecomposition(a_matrix, qr_matrix, q, 1, 1); // Accelerator warmup + +#if defined(FPGA_EMULATOR) + size_t reps = 2; +#else + size_t reps = 32; +#endif + cout << "Running QR decomposition of " << matrices << " matri" + << ((matrices == 1) ? "x " : "ces ") + << ((reps > 1) ? "repeatedly" : "") << "\n"; + + high_resolution_clock::time_point start_time = high_resolution_clock::now(); + QRDecomposition(a_matrix, qr_matrix, q, matrices, reps); + high_resolution_clock::time_point end_time = high_resolution_clock::now(); + duration diff = end_time - start_time; + q.throw_asynchronous(); + + cout << " Total duration: " << diff.count() << " s" + << "\n"; + cout << "Throughput: " << reps * matrices / diff.count() / 1000 + << "k matrices/s" + << "\n"; + + list to_check; + // We will check at least matrix 0 + to_check.push_back(0); + // Spot check the last and the middle one + if (matrices > 2) to_check.push_back(matrices / 2); + if (matrices > 1) to_check.push_back(matrices - 1); + + cout << "Verifying results on matrix"; + + for (size_t matrix : to_check) { + cout << " " << matrix; + size_t idx = 0; + for (size_t i = 0; i < COLS_COMPONENT; i++) { + for (size_t j = 0; j < COLS_COMPONENT; j++) { + if (j < i) + r_matrix[i][j][0] = r_matrix[i][j][1] = 0; + else { + r_matrix[i][j][0] = qr_matrix[matrix * kQRMatrixSizeFactor + idx++]; + r_matrix[i][j][1] = qr_matrix[matrix * kQRMatrixSizeFactor + idx++]; + } + } + } + + for (size_t j = 0; j < COLS_COMPONENT; j++) { + for (size_t i = 0; i < ROWS_COMPONENT; i++) { + q_matrix[i][j][0] = qr_matrix[matrix * kQRMatrixSizeFactor + idx++]; + q_matrix[i][j][1] = qr_matrix[matrix * kQRMatrixSizeFactor + idx++]; + } + } + + float acc_real = 0; + float acc_imag = 0; + float v_matrix[ROWS_COMPONENT][COLS_COMPONENT][2] = {{{0}}}; + for (size_t i = 0; i < ROWS_COMPONENT; i++) { + for (size_t j = 0; j < COLS_COMPONENT; j++) { + acc_real = 0; + acc_imag = 0; + for (size_t k = 0; k < COLS_COMPONENT; k++) { + acc_real += q_matrix[i][k][0] * r_matrix[k][j][0] - + q_matrix[i][k][1] * r_matrix[k][j][1]; + acc_imag += q_matrix[i][k][0] * r_matrix[k][j][1] + + q_matrix[i][k][1] * r_matrix[k][j][0]; + } + v_matrix[i][j][0] = acc_real; + v_matrix[i][j][1] = acc_imag; + } + } + + float error = 0; + size_t count = 0; + constexpr float kErrorThreshold = 1e-4; + for (size_t row = 0; row < ROWS_COMPONENT; row++) { + for (size_t col = 0; col < COLS_COMPONENT; col++) { + if (std::isnan(v_matrix[row][col][0]) || + std::isnan(v_matrix[row][col][1])) { + count++; + } + float real = v_matrix[row][col][0] - + a_matrix[matrix * kAMatrixSizeFactor + + col * ROWS_COMPONENT * kIndexAccessFactor + + row * kIndexAccessFactor]; + float imag = v_matrix[row][col][1] - + a_matrix[matrix * kAMatrixSizeFactor + + col * ROWS_COMPONENT * kIndexAccessFactor + + row * kIndexAccessFactor + 1]; + if (sqrt(real * real + imag * imag) >= kErrorThreshold) { + error += sqrt(real * real + imag * imag); + count++; + } + } + } + + if (count > 0) { + cout << "\nFAILED\n"; + cout << "\n" + << "!!!!!!!!!!!!!! Error = " << error << " in " << count << " / " + << ROWS_COMPONENT * COLS_COMPONENT << "\n"; + return 1; + } + } + + cout << "\nPASSED\n"; + return 0; + + } catch (sycl::exception const &e) { + cout << "Caught a synchronous SYCL exception: " << e.what() << "\n"; + cout << " If you are targeting an FPGA hardware, " + "ensure that your system is plugged to an FPGA board that is " + "set up correctly" + << "\n"; + cout << " If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR" + << "\n"; + + terminate(); + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/CMakeLists.txt new file mode 100755 index 0000000000..5c0cea463c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(DoubleBuffering) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/README.md new file mode 100755 index 0000000000..31b7e3df37 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/README.md @@ -0,0 +1,223 @@ +# Double Buffering to Overlap Kernel Execution with Buffer Transfers and Host Processing +This FPGA tutorial demonstrates how to parallelize host-side processing and buffer transfers between host and device with kernel execution, which can improve overall application performance. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How and when to implement the double buffering optimization technique +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +In an application where the FPGA kernel is executed multiple times, the host must perform the following processing and buffer transfers before each kernel invocation. +1. The output data from the *previous* invocation must be transferred from device to host and then processed by the host. Examples of this processing include: + * Copying the data to another location + * Rearranging the data + * Verifying it in some way +2. The input data for the *next* invocation must be processed by the host and then transferred to the device. Examples of this processing include: + * Copying the data from another location + * Rearranging the data for kernel consumption + * Generating the data in some way + +Without double buffering, host processing and buffer transfers occur *between* kernel executions. Therefore, there is a gap in time between kernel executions, which you can refer to as kernel *downtime* (see diagram below). If these operations overlap with kernel execution, the kernels can execute back-to-back with minimal downtime, thereby increasing overall application performance. + +### Determining When is Double Buffering Possible + +Let's define the required variables: +* **R** = Time to transfer the kernel's output buffer from device to host. +* **Op** = Host-side processing time of kernel output data (*output processing*) +* **Ip** = Host-side processing time for kernel input data (*input processing*) +* **W** = Time to transfer the kernel's input buffer from host to device. +* **K** = Kernel execution time + +![](downtime.png) + +In general, **R**, **Op**, **Ip**, and **W** operations must all complete before the next kernel is launched. To maximize performance, while one kernel is executing on the device, these operations should execute simultaneously on the host and operate on a second set of buffer locations. They should complete before the current kernel completes, thus allowing the next kernel to be launched immediately with no downtime. In general, to maximize performance, the host must launch a new kernel every **K**. + +This leads to the following constraint: + +```c++ +R + Op + Ip + W <= K, in order to minimize kernel downtime. +``` +If the above constraint is not satisfied, a performance improvement may still be observed because *some* overlap (perhaps not complete overlap) is still possible. Further improvement is possible by extending the double buffering concept to N-way buffering (see the corresponding tutorial). + +### Measuring the Impact of Double Buffering + +You must get a sense of the kernel downtime to identify the degree to which this technique can help improve performance. + +This can be done by querying the total kernel execution time from the runtime and comparing it to the overall application execution time. In an application where kernels execute with minimal downtime, these two numbers will be close. However, if kernels have a lot of downtime, overall execution time will notably exceed kernel execution time. The tutorial code exemplifies how to do this. + +### Tutorial Implementation Notes + +The basic idea is to: +1. Perform the input processing for the first two kernel executions and queue them both. +2. Immediately call the `process_output()` method (automatically blocked by the SYCL* runtime) on the first kernel completing because of the implicit data dependency. +3. When the first kernel completes, the second kernel begins executing immediately because it was already queued. +4. While the second kernel runs, the host processes the output data from the first kernel and prepares the input data for the third kernel. +5. As long as the above operations complete before the second kernel completes, the third kernel is queued early enough to allow it to be launched immediately after the second kernel. + +The process then repeats. + +The impact of double buffering on the total runtime of the tutorial program will be analyzed in the "Running the Sample" section below. + +## Key Concepts +* The double buffering optimization technique +* Determining when double buffering is beneficial +* How to measure the impact of double buffering + +## License +This code sample is licensed under MIT license. + + +## Building the `double_buffering` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `double_buffering_report.prj/reports/` or `double_buffering_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Note that because the optimization described in this tutorial takes place at the *runtime* level, the FPGA compiler report will not show a difference between the optimized and unoptimized cases. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./double_buffering.fpga_emu (Linux) + double_buffering.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./double_buffering.fpga (Linux) + ``` + +### Example of Output + +``` +Platform name: Intel(R) FPGA SDK for OpenCL(TM) +Device name: pac_a10 : Intel PAC Platform (pac_ee00000) + + +Executing kernel 100 times in each round. + +*** Beginning execution, without double buffering +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time without double buffering = 29742 ms +Total kernel-only execution time without double buffering = 17856 ms +Throughput = 35.255249 MB/s + + +*** Beginning execution, with double buffering. +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time with double buffering = 17967 ms +Total kernel-only execution time with double buffering = 17869 ms +Throughput = 58.35976 MB/s + + +Verification PASSED +``` + +### Discussion of Results + +A test compile of this tutorial design achieved a maximum frequency (fMAX) of approximately 340 MHz on the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. The results with and without double buffering are shown in the following table: + +Configuration | Overall Execution Time (ms) | Total Kernel Execution time (ms) +-|-|- +Without double buffering | 23462 | 15187 +With double buffering | 15145 | 15034 + +In both runs, the total kernel execution time is similar, as expected. However, without double buffering, the overall execution time notably exceeds the total kernel execution time, implying there is downtime between kernel executions. With double buffering, the overall execution time is close to the the total kernel execution time. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.sln new file mode 100755 index 0000000000..4108b65da8 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "double_buffering", "double_buffering.vcxproj", "{6910A54A-BFE5-462F-9F3B-B84F62C5ADD1}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {6910A54A-BFE5-462F-9F3B-B84F62C5ADD1}.Debug|x64.ActiveCfg = Debug|x64 + {6910A54A-BFE5-462F-9F3B-B84F62C5ADD1}.Debug|x64.Build.0 = Debug|x64 + {6910A54A-BFE5-462F-9F3B-B84F62C5ADD1}.Release|x64.ActiveCfg = Release|x64 + {6910A54A-BFE5-462F-9F3B-B84F62C5ADD1}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {1878B8F8-3C90-4CB5-9A71-66501FA4A3BA} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.vcxproj new file mode 100755 index 0000000000..b7ee382578 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/double_buffering.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {6910a54a-bfe5-462f-9f3b-b84f62c5add1} + Win32Proj + double_buffering + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)double_buffering.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)double_buffering.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/downtime.png b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/downtime.png new file mode 100755 index 0000000000000000000000000000000000000000..2a306929bc0a2ae8b326e1156ab8a0135cd9cc7c GIT binary patch literal 11112 zcmcI~1yodR_b)1<5+a}=0-_+@-Jmkm&_hV~fV8xdDuRq42oBAFFm#7>iG<_~2&hAc zbV*3V+%tgh``-KizW@KOb-&A6z&U4~v!C7j_w4<^G}IJG2yYSM;o*@eDS{w)c=(mT zb@Ao%z+cy1nhoIJIX8&HL%c#1?E-LvZ}UL)0UlmS1ks`SMd1F*6GeSDJiM#TxZiVM zoU^U)@Ys)(Ko7K^nyes9LyW#7=4c!kn$$RF(4zXnEhfV97B6YhfmVHrjC?c{E94Y3 z5iH5jhXZ->;ht^E={IS)*27-<{0z2T%e|uf%F;pVqrAmbJ9QXa!RYOenRn}N6AUA% zBm{qKDG&HxP7sl@3)o54noHiS-T9W{qpm~e5g_f$TVPn}bhJ27gq9j_(3#0G1Q^Br z7%Rd~FEs9h@lURvpNBAa=?UvKJ%-V1A@IpWHR@dR-VPjD27oLz zHKct(vt+#cPRA^3o&_eYLYM-}RltB?z zndmpyqRf-bC(+)W?es7sEJ~bSy%WTqN^Hs|D?}_FAHrZq04^T~r4x_4cWRIXsxha9 zbg}|kLcRM=UN6cgQhBw*N9E1K8k^H{>e^zI| z0F9jxlg(@~{BB+EKw)(cYiG{p@RaJV@;QF9%?uDMWc0|VdZ6S${XAr9#X3T4{iu>B zAu${+nd$Ty49y?ddkSHOT!HR_|0qkqDEEk&0lJ&16>N9You5ojyRDPE$t_qFc6_n< z&X0WK%RI3cAKkKF)3GSaXnCSXY5hoe#Q58{q8`d-ezRFxlJu70o*J<})eutpy+Czo z-~PrcOYlj^+_X}>A`^U)<1agf{1-cMTXH_BYjd|Btt2YHYVY>+4}sGzK*ePOYIhL9#P7QNO={w?Dr&;4>YmrqG0@2I<;A8 z;;+4i$g3bCSfC8i8o@>I)LR4y6%d&aW6qE>10>k_FHeKEER##x%){p!%LbGc{Au~U zQ`>8m$sU3`tU}$tx-K%{-!b%b8GDQ(7npUm&m*a-u9QPMn34dNywm@0kkGBxHcdU?CRLy`y zzulw6;;qsf}8o-WvR0xTY}niwS~ z@AaU<^O?Wy>cKpR(`#Mfo9{Y!P$rRNankynt>4r-uSHiZ$D1x4yJ^g@o`tu7Q|=J~ z9M&$n2UDc&F}nBqPaJ*##8C@`&j+QfDl8Z!z0=kLg{~IN90}yUhf(<)B7%6|bQi|$ zUiTGIaiI;Eo1y^)A`V{t$c4vgnWkpaVsqDXOKWw)^`4m>Wr_Kv9<-r;+qX{FI8cTs zi!h6g75Ia@d9;I5GK?17^!41McO`bZJIYVx9ZkyC^P8?4qBvlm##t_pifN5|^@{F= zW-&7+Okm9MRm73YP$AI@xz&2ML&py{_jwtEJTAZAZK*3riPxOS&1}gS!>3RlkbH@m zg=9+M1A7UP>?RM~fq)c^g)8n6UzLFsIgUyZuX~9P4ex zYmpqMe)z2i7HcAj+y+cng)^O+g$pJ31NI7=E@V#5f`TdE!A~sh?5HXU0f-AVBw6G3 zga7dR=eksmryiv;zeK8Q82*a--kfb34Q2*QLcsUl{KNN7_(#IC$M@&cV#9+b3(VEB z)tG-%=W};xr-Rn+WQd6C;eq7DyJH@m2Rk)F^GIKh$atppUb8(G(Q@e8hJ@$ zKHdq}tMQQwrcXL48EJ_WU28=X5jM^2fjC3ZaHA49~*X>a;cj~br<;9L*h7odYp<>8c5*!Fiv}-&EW#DJ#BZD!$ixZ8FXsfo zF!tuEcfD_|nuH1&SY9dRh#L&p6xXmqB4u->#W1a1{zx^L4%HQoU(F_bpjKpJ*2Ejm z+XFrc9=aJIx}UFV4D%Y_v;ndxtG|bf-Wp7rcP8^cLP0B8wsml+=#q%UIF-tp*pO`< zKgG;7JLRB2F4PA7h$_Z(CvO$9av%CCw^ct;!RxANu+N>nCeBMoT&i~0{;nH>gZ>vf zK~^f34CF`gEGVOUf9!FR!?N_j#Qb3g%-wH(@Rk|*s%H{940hZDgSW0iz%a|Yn41Ge ziIH{tfp*RAE1lcn{Y$oXi}Gjax4J&t)S2r_k9EsFsgE=O?4?j|*wpb&LFym7~eZlz$}!v-PR#j;?iY zA>l+fTSe>^Cxxu(Hzm{Y zcVR*IPvFu^T=fVQKigM%>;i_OYO8NeWTk7$9#GFR-e>7Ctm|&`jjmuFK8gy~{F7u8 z$h1~OKB$1&K+Ft$`~NUR6;MpFY{A4E>P|P~>+mc&gI-OR>ieN`z_4Q!L@#Ac^mjO~ zkWET!Lcpuys3F39&72ofyII}DaE;2Wuf2h!%3 zQvq$vs=S!Px_hpIL_oN8S(rIrv7ej6jZ+b8#55(PH{+_6W)(arblEL4fGAnwK`qx< z4bWfeTt2$E^^6)LvpLvNOIL5OXcKs@4+xluRR@jWYNK~DHN$bnn9G&KStF0m@KCVw zsiCyuLt1{gMUH5mK4-VYA(gjU7HpS-&oQegNHJ=}8E(-T-^%F{i)Q1ilYf*!Zvcip zqDgN8WLq$VmGFZWK7-E{?KIU}ZmYrk)lh<7%OaesH%K8CG()~Siy^Vqek&B9w~Z|{s9qmEcJbD$>{B@GjXu~vmw&7| z(BB>5>)2CNtq#5}Oc}~*SeJk)uKLbLU+;jFB&(In0*O71{6hLzhWQ@rmYR^hNbxK&Nh!?OB^CbHQE&ZE5ega z|349)FQC5H@XG~vnZidO%!2v?B#!N=(7A?vtbs_>m-qaUVAkeGf0j#F*IW@HV`~;c z{6Sj$qMlchWx=n$y(3@u)E)W@RPvNv3s+e9=_O0?Sxd+munDekOgRD2?XEw^efJ9+v00=8ZF~pc_TP?UN*IIqJx6}qTHmED z=OLGoHhlc{1&je_MKcss{{%Qo2A3}w56IH{fe zL7q`JXF}+R8#*>V=*d7Q$bGlB=}Y4gextYi>6Z5~Gj2n5m+4`RQMKz#`*+e@X8ExB z^>~@FTIcZ=s&f|NU6Dnr)!0w63Ip4G2fHQH>e72w48ZpLWLr6+9>AU@>Hj+nKG_9& zHQlEII#p7-*l|5(ukm;-H-ZfLbY@OQ%y-_5kV62s`&CCH4mQ{5uwC@%yu`NIDV{=F zQFlH6qtnfFZhXjwfC;;lIyD0YnG@R)to;G_ReJTa&Dp*+|2cp`6Tg*k|FwqAQ6#xOy53)BMF3)IV1n3d%Bd$^6)92{$MMao zO`!<^=&OLLUXk#m0d(J0(X47`y*lRti+bZ$z%uiXfp#J;p31s;D>=n;r>{J$@_l@d z$%#w_EEsjeeCo7c6m@?_c4P{;e~G(ie@7uOH_LUtNGSc&m+t|}(kaxFWdPu??+VKXU z@UpJ=r>x>7U{wc?ZgCYR{E?|r-3xfn3y6Uh)*WQhjlF{-%BK3RE=6hlY%{-S-qz20 zjv2|N!l{oY4&uLNf| zimN)Y&If8J?#5#Cs2fG3b?mam2nKbpMvtz44$hU~;g>MWItU5f6^Tb(_Lk!(wXO=DiR<29UGY21GUYaN<-Ju+PJ{3t5I7usVJw9CB3(~ zLPLr4`8N7HA(T{I5%Z?X?h(-|%n?pbE7EUhn?~)F7b&k4>)ZvzNmnhggDFH<$_G?w z4v^wm%KJxdayW2{LgTr{Er{LBc0*ILs-zF7`na=v-j6*lf^@2Wx1KBX;I_J}vE3Kn z|Ff+I{$sSqIA)7pjp`50w~bpcxd_X)XQI+q@r$c@1p?D~I&AlX?=(^$Vw$nliR3;j zGEc$XnXwtD&ae3nruqpDPq+@Us)c-8tl1*1E}P*rovvD3wNQxO6}^TR_I{Pe_198> zSx#cphSiKeW)RI>_Iu2<-E00JTx+kX?z^DOfKYs?_0a{nCDIv{u@)st=0K z3kY%6ckJF^Iw&$`f0joJ@PudCbU+e4Zn{accwEZZ*t&0*j4kVN$T(t}?!jMMG@_Y6 z>rlJik4KcIlv*s4EzeLoDPX@eHry+(-#X zSQg;_$s+Yzy^?b`IkFlSaRF$sI=JGz+}6@nd%bd#7f?acEOtZ{R=-Uz$z+X*IDNZw!F8!-EZOD zr@{4p9~By5>X$gOQj+U8HdBnS6K@S0k8E1na0PJiyU9Dxp~kfxbmL*EBmWSwrE=Ef z)BGhtyyr3R$nV-($&~|5*Zd6Ln$nNnQzcmU?SLcoAz}q~MDTRq8t~RO@Ro6*Y2z`j zM=KY^xH0SOUmh^N*7*CI;YaFlZE=z>$c2wPV?O9)&^bJV``}mZVlGn$Ht56qh>K*5 z`&}7xUTZZG!p)kvh2d&GKMhHT-?QS~CjwaKeoabOw?9O*cs=B1U&Of2%If$Lr(-ZO zMy+|B72&xiWm>_aiT9jRg`266Q#l=1uW5vh z{oYeKEQMXZ;_-e>K%GvY*4TSJtmX=0xWey^aNc{hP*#L4izDSDY6c(ji+GQlo@goA zpa^cd?0*g(6~DaH)juks*27uU5pyN8<@Z}uP{u<1MXyhPwKdU?LGQ?=O0|p^4qs2* z31zQL+XwbHIf=cGf)yPfuplUbZfW_&w;%P5pzvj->v$h7%BnG$1ji@TT8kJ6Q`*#s z(CGo`(U5D5&ao~(Tz2e%N}PKOOq~WH#zPx}h|GWNg&@XMf`|{A%qIJz08UJz}ZoUHz^IzKkySKb^@pvL00X zXPwEJnjp+Eebkm+c0=-lg1#1{=*f}}P3BB~u|-IxP0ZF8x>1Ubvd8P7?)EK3*!DDh zTeoDb$r-xv{YDwDl$WE$LM0g{r_lXaw!}$uMpyfPZ=V|WDrL!O2P->1V8j`p@6`Ce zv|u|Z$QqHuA+hyVRGe2V!`??Fm|H74Xx}aB?%1e(qt};!eutE9-Dn>ul4{a7P=PbN zdSE?#{D4ei5mI0i5qPk6j=N&m*!jY#Pgu;Pfps-YW1`;XX9ojq&@-RlJM)R&Q=cHo zjLLofb7Qc3!$M^F#zIC2w~0GvYig60#oK!6^?ACwFHSR|uUp@feAbg&CE=Ef$@iT) zZ4GKRri%0u`5!5CM3m>eKecEV&JD?NMh%NQGpfazQ9qm-H9>_*Zfeor9}T?;gUBT> zj2Vo{AjPkeY*m9?JX@_QCWveg^=^D!sz*wX^({L^x5lO*O~#^H%0qVXemRs!Vd%`JN%$&Ge1f3d`a@=!_U_jw#AqC5a(F$}=B>)=%u7BY{|mfEQRev?Y;K z;fr8It&dLf09rHqb__U|pu!3MIWO`kJc;ns+I9=s(oyeY_P(`BcjvBw65cu?VwORd#~GLG`c`xaEn69Nf3_Lq6^X|X zu11o7FXBRRRk17?#*-_1(OZ4^${;ZqjS67>#+8ek%&frtPRQf@sZI6FpTtl6=|Ne} zJz_QPcy}I?i&WpHQ4sjF}Z5B85W%^GU(Z<9+0-#H<~6`oJWXSxg(vKbFe|^ zqa&EOp+WBIZC}&J+W{$FBGKn_CvN*?9&~)#S4L`Ku*+jkF|RPy#n%tju}jGzO||7u z#c!ZBuNq1sI^mC#k(fHlI`eYVaMj?`+^ScdO^zFdSswspVzbhc6k2uWJd==9=aKI| zbsk^#J~3VuRr7mG@wK0e)7R!1G>ofmi8AHp1^w0yW<|VF{;SI}*54uI#R+xIpgFw5 zq&w^kC;!`dT7di*pJ??W<8*R#>Zm1WYXx`xSxeJJ?b7XN&(4%@G=p9`sDBwJ3ol|J25NE9Bz=2Wizh6QT6`?Ej{aozINwk;7j0miW6=olXybwUYWpYo0ML zuXM&d>k0Gcv^r*A!mEUqYqQByo-M{jzrUp{e?TG!Ed-{{1A-5UYquwqx0&u7A05^ zq>TVJY$4~huB3NeFdg0;!`5e`0z$~@*ZT(1%XOiDL8akp%h#v$aTD%CpR&1REd2>o z;*i01JUVV}%<6eE@YPoIc1C}=@ATbMt>C829v;7cHC=iZK^cx>xqW~}OCS6LRV_@( z{dqHLcFPny2*Yk?Epmq3Y~92jd{Os3Z~^ci7Y_e<00`3uhcL7Jcef&o=y6c(kmR-{ z+lu>EqvS?=z134p37l`3Y7N~P+nlMD-i%jw92OR`SgqPbsjvND z!$({O^BP*=FP>(TtuiLC z(qoq(!TUgx4pKz?-=0{&hpK!eE-8%COWzEe|A^lea`}Qp6MsuVzbzgf7tntB+8%#E z+PK&LRF@Qro&T|(rH)7=wq$^YoPP2Y);iD$e8a&E39f?SR+j=+kByKy-aR>xV1Iet z{$RUte=!eku98ly$*yl~T>kcJ`1K)`pG@1TSz&sP%)F^$6l}61a0CI~wTrmJ1Uu?U zqGAEM2-PpkxyD(56HvZ#0Sz#yzR_zoNwXef2j0@hxm^BPm4Mu9r7Wo1sOn8H5Bw<|9vDpHpWshB<7RJCEo z_?~cV&7?>rW}mw`j~mbfZ(DNZMtRHIb>VJt(iJxVtUEQRT>$*Jj69tvkJ_|Tqs>5@ z>37&b3`~)_0Y}ix52u2^3F-SF=dZVb z&Mqtgf=tcgwl=K191m&)rJwQKj;^I##)#DJ7LM6GnYnDITyGz$khK%1;CNI87W**t z^}7RYxaUAy%vHm-nZ1P^ztGsgxAw8k<~c)IjSv{1SizfmEC}nQr&$@})(-<;2z|9y z19&xaI9b6^K4cR7kW$<=9V6%bI+eC6423+%v~uY+RxB`1PKiE{-hVTk8ciZ^ImMCN z{mANCraEO_cKLgLa0Zvp?DrZht+v>kzZdb%V*1yMcr&f9P9;xpN+(^2iIjT0u=WK* zAHCv@;-;gF=!^-IQh0q}x%nKr>pk7O-G^w7?R%zu8&wcr`keu(XD{sM@$*iF<`CbZ z0DF3W*cEq!MMq}hy)Z&s0T#g6yuvt*21`DpQnS#!!A%DYC+!pdF`P73B9WIZ$OeX! zWyaLo^?FT2@FT~Djv%s}7Gbc&ElEZ|Ju0dFY zJe@jgt(~{)^|Z_N7KLt-)Ti04&!d=m-c&k7MV0nluEY-_ambg`nHHv45VRym#CRrN zc`NZ&*zGG21eSsNSH5NprsbM>qx;mRZ$z`c)-YtJbZ9RvI3%hPUc^da7wz*mu7U48Wg7WQ48#Q}lz zx~EEzV&njnfb5TQ`d3N-IWtWLj6RgT#L4K>`*rx!K&(_%E`Qm@@yFvIGIX^aZ;xV@ z<@i#@#B9e9P~lmkd^ybOBcZSoN}adEDuBgc?gpKX%z~?eL)}VQ=kUTvfar{64P~#5 zW99N9@S5QF4sh``k>5u9>a05{@VM+_%mlaI|Dqz`vx zQc>RoeP$(CLf0JWSDWQrsme2o?BoV#`w%3Z?Lx{m6Sov~O^}U8G&3l`WAWJMMvA8 zRyalA;mH%>l1(W4xF^)1;zsn%vovu_DXUdbH}T4XBx89-<3a9P#Okev$nwy>+ImC|}fB-@e=Q{(HD6t3-H|vh=9Y9bJD` zwZxvx7ynO{qY&uTPoy&9>JGh(=V1p+O8Qyd0R{_Y|NYwcdQt+`c1@3;njkd3;3f}o zjcbtLI7RBTynB>@sZ1>R%J~^Se|m!}B~yUK9+R?>Ma|CkB#!T`_FW*Q6WiI>@vx@L z0Z!VX4krf2F71q0xdif)v&m{lw=f`VKv_(B@}QePu6JG&@dE^0=uY?^8f$< literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/sample.json new file mode 100755 index 0000000000..b10e6e185a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "B210B44F-FB86-4F42-BA4A-9980805350FF", + "name": "Overlapping Kernel Execution with Buffer Transfers and Host Processing through Double Buffering", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial design to demonstrate overlapping kernel execution with buffer transfers and host-processing to improve system performance", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./double_buffering.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "double_buffering.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt new file mode 100755 index 0000000000..f918135042 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE double_buffering.cpp) +set(TARGET_NAME double_buffering) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/build.ninja new file mode 100755 index 0000000000..3e8fdc6126 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/build.ninja @@ -0,0 +1,30 @@ +source_file = double_buffering.cpp +target_name = double_buffering + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp new file mode 100755 index 0000000000..556507e307 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp @@ -0,0 +1,349 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include + +#include "dpc_common.hpp" + +using namespace sycl; + +// kTimes = # times to execute the kernel. kTimes must be >= 2 +// kSize = # of floats to process on each kernel execution. +// run less in emulation to avoid high run time +#if defined(FPGA_EMULATOR) +constexpr int kTimes = 20; +constexpr int kSize = 4096; +#else +constexpr int kTimes = 100; +constexpr int kSize = 2621440; +#endif + +// Kernel executes a power function (base^kPow). Must be +// >= 2. Can increase this to increase kernel execution +// time, but ProcessOutput() time will also increase. +constexpr int kPow = 20; + +// Number of iterations through the main loop +constexpr int kNumRuns = 2; + +bool pass = true; + +class SimpleVpow; + +/* Kernel function. + Performs buffer_b[i] = buffer_a[i] ** pow + Only supports pow >= 2. + This kernel is not meant to be an optimal implementation of the power + operation -- it's just a sample kernel for this tutorial whose execution time + is easily controlled via the pow parameter. SYCL buffers are created + externally and passed in by reference to control (external to this function) + when the buffers are destructed. The destructor causes a blocking buffer + transfer from device to host and double buffering requires us to not block + here (because we need to launch another kernel). So we only want this + transfer to occur at the end of overall execution, not at the end of each + individual kernel execution. +*/ +void SimplePow(std::unique_ptr &q, buffer &buffer_a, + buffer &buffer_b, event &e) { + // Submit to the queue and execute the kernel + e = q->submit([&](handler &h) { + // Get kernel access to the buffers + auto accessor_a = buffer_a.get_access(h); + auto accessor_b = buffer_b.get_access(h); + + const int num = kSize; + assert(kPow >= 2); + const int p = kPow - 1; // Assumes pow >= 2; + + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (int j = 0; j < p; j++) { + if (j == 0) { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_a[i] * accessor_a[i]; + } + } else { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_b[i] * accessor_a[i]; + } + } + } + }); + }); + + event update_host_event; + update_host_event = q->submit([&](handler &h) { + auto accessor_b = buffer_b.get_access(h); + + /* + Explicitly instruct the SYCL runtime to copy the kernel's output buffer + back to the host upon kernel completion. This is not required for + functionality since the buffer access in ProcessOutput() also implicitly + instructs the runtime to copy the data back. But it should be noted that + this buffer access blocks ProcessOutput() until the kernel is complete + and the data is copied. In contrast, update_host() instructs the runtime + to perform the copy earlier. This allows ProcessOutput() to optionally + perform more useful work *before* making the blocking buffer access. Said + another way, this allows ProcessOutput() to potentially perform more work + in parallel with the runtime's copy operation. + */ + h.update_host(accessor_b); + }); +} + +// Returns kernel execution time for a given SYCL event from a queue. +ulong SyclGetExecTimeNs(event e) { + ulong start_time = + e.get_profiling_info(); + ulong end_time = + e.get_profiling_info(); + return (end_time - start_time); +} + +// Local pow function for verifying results +float MyPow(float input, int pow) { + return (pow == 0) ? 1 : input * MyPow(input, pow - 1); +} + +/* Compares kernel output against expected output. Only compares part of the + output so that this method completes quickly. This is done + intentionally/artificially keep host-processing time shorter than kernel + execution time. Grabs kernel output data from its SYCL buffer. Reading from + this buffer is a blocking operation that will block on the kernel completing. + Queries and records execution time of the kernel that just completed. This + is a natural place to do this because ProcessOutput() is blocked on kernel + completion. +*/ +void ProcessOutput(buffer &input_buf, + buffer &output_buf, int exec_number, event e, + ulong &total_kernel_time_per_slot) { + auto input_buf_acc = input_buf.get_access(); + auto output_buf_acc = output_buf.get_access(); + int num_errors = 0; + int num_errors_to_print = 10; + /* The use of update_host() in the kernel function allows for additional + host-side operations to be performed here, in parallel with the buffer copy + operation from device to host, before the blocking access to the output + buffer is made via output_buf_acc[]. To be clear, no real operations are + done here and this is just a note that this is the place + where you *could* do it. */ + for (int i = 0; i < kSize / 8; i++) { + const bool out_valid = (MyPow(input_buf_acc[i], kPow) != output_buf_acc[i]); + if ((num_errors < num_errors_to_print) && out_valid) { + if (num_errors == 0) { + pass = false; + std::cout << "Verification failed on kernel execution # " << exec_number + << ". Showing up to " << num_errors_to_print + << " mismatches.\n"; + } + std::cout << "Verification failed on kernel execution # " << exec_number + << ", at element " << i << ". Expected " << std::fixed + << std::setprecision(16) << MyPow(input_buf_acc[i], kPow) + << " but got " << output_buf_acc[i] << "\n"; + num_errors++; + } + } + + // At this point we know the kernel has completed, + // so can query the profiling data. + total_kernel_time_per_slot += SyclGetExecTimeNs(e); +} + +/* + Generates input data for the next kernel execution. Only fills part of the + buffer so that this method completes quickly. This is done + intentionally/artificially keep host-processing time shorter than kernel + execution time. Writes the data into the associated SYCL buffer. The write + will block until the previous kernel execution, that is using this buffer, + completes. +*/ +void ProcessInput(buffer &buf) { + // We are generating completely new input data, so can use discard_write() + // here to indicate we don't care about the SYCL buffer's current contents. + auto buf_acc = buf.get_access(); + + // RNG seed + auto seed = std::chrono::system_clock::now().time_since_epoch().count(); + + // RNG engine + std::default_random_engine dre(seed); + + // generate random numbers between 1 and 2 + std::uniform_real_distribution di(1.0f, 2.0f); + + // Randomly generate a start value and increment from there. + // Compared to randomly generating every value, this is done to + // speed up this function a bit. + float start_val = di(dre); + + for (int i = 0; i < kSize / 8; i++) { + buf_acc[i] = start_val; + start_val++; + } +} + +int main() { +// Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; + std::cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector device_selector; +#endif + + try { + auto prop_list = + property_list{property::queue::enable_profiling()}; + + std::unique_ptr q; + q.reset(new queue(device_selector, dpc_common::exception_handler, prop_list)); + + platform platform = q->get_context().get_platform(); + device device = q->get_device(); + std::cout << "Platform name: " + << platform.get_info().c_str() << "\n"; + std::cout << "Device name: " + << device.get_info().c_str() << "\n\n\n"; + + std::cout << "Executing kernel " << kTimes << " times in each round.\n\n"; + + // Create a vector to store the input/output SYCL buffers + std::vector> input_buf; + std::vector> output_buf; + + // SYCL events for each kernel launch. + event sycl_events[2]; + + // In nanoseconds. Total execution time of kernels in a given slot. + ulong total_kernel_time_per_slot[2]; + + // Total execution time of all kernels. + ulong total_kernel_time = 0; + + // Allocate vectors to store the host-side copies of the input data + // Create and allocate the SYCL buffers + for (int i = 0; i < 2; i++) { + input_buf.push_back(buffer(range<1>(kSize))); + output_buf.push_back(buffer(range<1>(kSize))); + } + + /* + Main loop. This loop runs twice to show the performance difference without + and with double buffering. + */ + for (int i = 0; i < kNumRuns; i++) { + for (int i = 0; i < 2; i++) { + total_kernel_time_per_slot[i] = 0; // Initialize timers to zero. + } + + switch (i) { + case 0: { + std::cout << "*** Beginning execution, without double buffering\n"; + break; + } + case 1: { + std::cout << "*** Beginning execution, with double buffering.\n"; + break; + } + default: { + std::cout << "*** Beginning execution.\n"; + } + } + + // Start the timer. This will include the time to process the input data + // for the first 2 kernel executions. + dpc_common::TimeInterval exec_time; + + if (i == 0) { // Single buffering + for (int i = 0; i < kTimes; i++) { + // Only print every few iterations, just to limit the prints. + if (i % 10 == 0) { + std::cout << "Launching kernel #" << i << "\n"; + } + + ProcessInput(input_buf[0]); + SimplePow(q, input_buf[0], output_buf[0], sycl_events[0]); + ProcessOutput(input_buf[0], output_buf[0], i, sycl_events[0], + total_kernel_time_per_slot[0]); + } + } else { // Double buffering + // Process input for first 2 kernel launches and queue them. Then block + // on processing the output of the first kernel. + ProcessInput(input_buf[0]); + ProcessInput(input_buf[1]); + + std::cout << "Launching kernel #0\n"; + + SimplePow(q, input_buf[0], output_buf[0], sycl_events[0]); + for (int i = 1; i < kTimes; i++) { + if (i % 10 == 0) { + std::cout << "Launching kernel #" << i << "\n"; + } // Only print every few iterations, just to limit the prints. + + // Launch the next kernel + SimplePow(q, input_buf[i % 2], output_buf[i % 2], sycl_events[i % 2]); + + // Process output from previous kernel. This will block on kernel + // completion. + ProcessOutput(input_buf[(i - 1) % 2], output_buf[(i - 1) % 2], i, + sycl_events[(i - 1) % 2], + total_kernel_time_per_slot[(i - 1) % 2]); + + // Generate input for the next kernel. + ProcessInput(input_buf[(i - 1) % 2]); + } + + // Process output of the final kernel + ProcessOutput(input_buf[(kTimes - 1) % 2], output_buf[(kTimes - 1) % 2], + i, sycl_events[(kTimes - 1) % 2], + total_kernel_time_per_slot[(kTimes - 1) % 2]); + } + + // Add up the overall kernel execution time. + total_kernel_time = 0; + for (int i = 0; i < 2; i++) { + total_kernel_time += total_kernel_time_per_slot[i]; + } + + // Stop the timer. + double time_span = exec_time.Elapsed(); + + std::cout << "\nOverall execution time " + << ((i == 0) ? "without" : "with") << " double buffering = " + << (unsigned)(time_span * 1000) << " ms\n"; + std::cout << "Total kernel-only execution time " + << ((i == 0) ? "without" : "with") << " double buffering = " + << (unsigned)(total_kernel_time / 1000000) << " ms\n"; + std::cout << "Throughput = " << std::setprecision(8) + << (float)kSize * (float)kTimes * (float)sizeof(float) / + (float)time_span / 1000000 + << " MB/s\n\n\n"; + } + if (pass) { + std::cout << "Verification PASSED\n"; + } else { + std::cout << "Verification FAILED\n"; + return 1; + } + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/CMakeLists.txt new file mode 100755 index 0000000000..134e6d8534 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/CMakeLists.txt @@ -0,0 +1,12 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + + +cmake_minimum_required (VERSION 2.8) + +project(NWayBuffering) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md new file mode 100755 index 0000000000..d4fb12ba40 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md @@ -0,0 +1,297 @@ + +# N-Way Buffering to Overlap Kernel Execution with Buffer Transfers and Host Processing + +This FPGA tutorial demonstrates how to parallelize host-side processing and buffer transfers between host and device with kernel execution to improve overall application performance. It is a generalization of the 'double buffering' technique, and can be used to perform this overlap even when the host-processing time exceeds kernel execution time. + +***Documentation***: The [FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a resource for general target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How and when to apply the N-way buffering optimization technique +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +N-Way buffering is a generalization of the double buffering optimization technique (see the "Double Buffering" FPGA tutorial). This system-level optimization enables kernel execution to occur in parallel with host-side processing and buffer transfers between host and device, improving application performance. N-way buffering can achieve this overlap even when the host-processing time exceeds kernel execution time. + +### Background + +In an application where the FPGA kernel is executed multiple-times, the host must perform the following processing and buffer transfers before each kernel invocation: +1. The output data from the *previous* invocation must be transferred from the device to host and then processed by the host. Examples of this processing include the following: + * Copying the data to another location + * Rearranging the data + * Verifying it in some way +2. The input data for the *next* invocation must be processed by the host and then transferred to the device. Examples of this processing include: + * Copying the data from another location + * Rearranging the data for kernel consumption + * Generating the data in some way + +Without the technique described in this tutorial, host processing and buffer transfers occur *between* kernel executions. Therefore, there is a gap in time between kernel executions, which you can refer to as kernel "downtime" (see diagram below). If these operations overlap with kernel execution, the kernels can execute back-to-back with minimal downtime, thereby increasing overall application performance. + +### N-Way Buffering + +This technique is referred to as *N-Way Buffering*, but is frequently called *double buffering* in the most common case where N=2. + +Let's first define some variables: + +| Variable | Description | +| ------ | ------ | +| **R** | Time to transfer the kernel's output buffer from device to host. | +| **Op** | Host-side processing time of kernel output data (*output processing*). | +| **Ip** | Host-side processing time for kernel input data (*input processing*). | +| **W** | Time to transfer the kernel's input buffer from host to device. | +| **K** | Kernel execution time. | +| **N** | Number of buffer sets used. | +| **C** | Number of host-side CPU cores. | + + + +![](downtime.png) + +In general, the **R**, **Op**, **Ip**, and **W** operations must all complete before the next kernel is launched. To maximize performance, while one kernel is executing on the device, these operations should run in parallel and operate on a separate set of buffer locations. You should complete before the current kernel completes, thus allowing the next kernel to be launched immediately with no downtime. In general, to maximize performance, the host must launch a new kernel every **K**. + +If these host-side operations are executed serially, this leads to the following constraint: + +```c++ +R + Op + Ip + W <= K, to minimize kernel downtime. +``` + +In the above example, if the constraint is satisfied, the application requires two sets of buffers. In this case, **N**=2. + +However, the above constraint may not be satisfied in some applications (i.e., if host-processing takes longer than the kernel execution time). + +**NOTE**: A performance improvement may still be observed because kernel downtime may still be reduced (though perhaps not maximally reduced). + +In this case, to further improve performance, the reduce host-processing time through multi-threading. Rather than executing the above operations serially, perform the input- and output-processing operations in parallel using two threads, leading to the following constraint: + +```c++ +Max (R+Op, Ip+W) <= K +and +R + W <= K, to minimize kernel downtime. +```` + +If the above constraint is still unsatisfied, the technique can be extended beyond two sets of buffers to **N** sets of buffers to help improve the degree of overlap. In this case, the constraint becomes: + +```c++ +Max (R + Op, Ip + W) <= (N-1)*K +and +R + W <= K, to minimize kernel downtime. +``` + +The idea of N-way buffering is to prepare **N** sets of kernel input buffers, launch **N** kernels, and when the first kernel completes, begin the subsequent host-side operations. These operations may take a long time (longer than **K**), but they do not cause kernel downtime because an additional **N**-1 kernels have already been queued and can launch immediately. By the time these first **N** kernels complete, the aforementioned host-side operations would have also completed and the **N**+1 kernel can be launched with no downtime. As additional kernels complete, corresponding host-side operations are launched on the host, in a parallel fashion, using multiple threads. Although the host operations take longer than **K**, if **N** is chosen correctly, they will complete with a period of **K**, which is required to ensure we can launch a new kernel every **K**. To reiterate, this scheme requires multi-threaded host-operations because the host must perform processing for up to **N** kernels in parallel in order to keep up. + +The above formula can be used to calculate the **N** required to minimize downtime. However, there are some practical limits: +* **N** sets of buffers are required on both the host and device, therefore both must have the capacity for this many buffers. +* If the input and output processing operations are launched in separate threads, then (**N**-1)*2 cores are required, so **C** can be become the limiting factor. + +### Measuring the Impact of N-Way Buffering + +You must get a sense of the kernel downtime to identify the degree to which this technique can help improve performance. + +This can be done by querying total kernel execution time from the runtime and comparing it to with overall application execution time. In an application where kernels execute with minimal downtime, these two numbers are close. However, if kernels have a lot of downtime, overall execution time notably exceeds the kernel execution time. The tutorial code exemplifies how to do this. + +### Tutorial Implementation Notes + +The example code runs with multiple iterations to illustrate how performance improves as **N** increases and as multi-threading is used. + +It is useful to think of the execution space as having **N** slots where the slots execute in chronological order, and each slot has its own set of buffers on the host and device. At the beginning of execution, the host prepares the kernel input data for the **N** slots and launches **N** kernels. When slot-0 completes, slot-1 begins executing immediately because it was already queued. The host begins both the output and input processing for slot-0. These two operations must complete before the host can queue another kernel into slot-0. The same is true for all slots. + +After each kernel is launched, the host-side operations (that occur *after* the kernel in that slot completes) are launched immediately from the `main()` program. They block until the kernel execution for that slot completes (this is enforced by the runtime). + + +## Key Concepts +* The N-way buffering optimization technique as a generalization of double buffering +* Determining when N-way buffering is practical and beneficial +* How to measure the impact of N-way buffering + +## License +This code sample is licensed under MIT license. + + +## Building the `n_way_buffering` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `n_way_buffering_report.prj/reports/` or `n_way_buffering_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Note that because the optimization described in this tutorial takes place at the *runtime* level, the FPGA compiler report will not show a difference between the optimized and unoptimized cases. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./n_way_buffering.fpga_emu (Linux) + n_way_buffering.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./n_way_buffering.fpga (Linux) + ``` + +### Example of Output + +``` +Platform name: Intel(R) FPGA SDK for OpenCL(TM) +Device name: pac_a10 : Intel PAC Platform (pac_ec00000) + + +Executing kernel 100 times in each round. + +*** Beginning execution, 1-way buffering, single-threaded host operations +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time = 65915 ms +Total kernel-only execution time = 17852 ms +Throughput = 15.907802 MB/s + + +*** Beginning execution, 1-way buffering, multi-threaded host operations. +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time = 51814 ms +Total kernel-only execution time = 17852 ms +Throughput = 20.237082 MB/s + + +*** Beginning execution, 2-way buffering, multi-threaded host operationss +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time = 26109 ms +Total kernel-only execution time = 17852 ms +Throughput = 40.160442 MB/s + + +*** Beginning execution, N=5-way buffering, multi-threaded host operations +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time with N-way buffering = 18763 ms +Total kernel-only execution time with N-way buffering = 17851 ms +Throughput = 55.884682 MB/s + + +Verification PASSED +``` + +### Discussion of Results + +A test compile of this tutorial design achieved an fMAX of approximately 340 MHz on the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. The results are shown in the following table: + +Configuration | Overall Execution Time (ms) | Total Kernel Execution time (ms) +-|-|- +1-way buffering, single-threaded | 64401 | 15187 +1-way buffering, multi-threaded | 53540 | 15187 +2-way buffering, multi-threaded | 27281 | 15187 +5-way buffering, multi-threaded | 16284 | 15188 + +In all runs, the total kernel execution time is similar, as expected. In the first three configurations, the overall execution time notably exceeds the total kernel execution time, implying there is downtime between kernel executions. However, as we switch from single-threaded to multi-threaded host operations and increase the number of buffer sets used, the overall execution time approaches the kernel execution time. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/downtime.png b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/downtime.png new file mode 100755 index 0000000000000000000000000000000000000000..2a306929bc0a2ae8b326e1156ab8a0135cd9cc7c GIT binary patch literal 11112 zcmcI~1yodR_b)1<5+a}=0-_+@-Jmkm&_hV~fV8xdDuRq42oBAFFm#7>iG<_~2&hAc zbV*3V+%tgh``-KizW@KOb-&A6z&U4~v!C7j_w4<^G}IJG2yYSM;o*@eDS{w)c=(mT zb@Ao%z+cy1nhoIJIX8&HL%c#1?E-LvZ}UL)0UlmS1ks`SMd1F*6GeSDJiM#TxZiVM zoU^U)@Ys)(Ko7K^nyes9LyW#7=4c!kn$$RF(4zXnEhfV97B6YhfmVHrjC?c{E94Y3 z5iH5jhXZ->;ht^E={IS)*27-<{0z2T%e|uf%F;pVqrAmbJ9QXa!RYOenRn}N6AUA% zBm{qKDG&HxP7sl@3)o54noHiS-T9W{qpm~e5g_f$TVPn}bhJ27gq9j_(3#0G1Q^Br z7%Rd~FEs9h@lURvpNBAa=?UvKJ%-V1A@IpWHR@dR-VPjD27oLz zHKct(vt+#cPRA^3o&_eYLYM-}RltB?z zndmpyqRf-bC(+)W?es7sEJ~bSy%WTqN^Hs|D?}_FAHrZq04^T~r4x_4cWRIXsxha9 zbg}|kLcRM=UN6cgQhBw*N9E1K8k^H{>e^zI| z0F9jxlg(@~{BB+EKw)(cYiG{p@RaJV@;QF9%?uDMWc0|VdZ6S${XAr9#X3T4{iu>B zAu${+nd$Ty49y?ddkSHOT!HR_|0qkqDEEk&0lJ&16>N9You5ojyRDPE$t_qFc6_n< z&X0WK%RI3cAKkKF)3GSaXnCSXY5hoe#Q58{q8`d-ezRFxlJu70o*J<})eutpy+Czo z-~PrcOYlj^+_X}>A`^U)<1agf{1-cMTXH_BYjd|Btt2YHYVY>+4}sGzK*ePOYIhL9#P7QNO={w?Dr&;4>YmrqG0@2I<;A8 z;;+4i$g3bCSfC8i8o@>I)LR4y6%d&aW6qE>10>k_FHeKEER##x%){p!%LbGc{Au~U zQ`>8m$sU3`tU}$tx-K%{-!b%b8GDQ(7npUm&m*a-u9QPMn34dNywm@0kkGBxHcdU?CRLy`y zzulw6;;qsf}8o-WvR0xTY}niwS~ z@AaU<^O?Wy>cKpR(`#Mfo9{Y!P$rRNankynt>4r-uSHiZ$D1x4yJ^g@o`tu7Q|=J~ z9M&$n2UDc&F}nBqPaJ*##8C@`&j+QfDl8Z!z0=kLg{~IN90}yUhf(<)B7%6|bQi|$ zUiTGIaiI;Eo1y^)A`V{t$c4vgnWkpaVsqDXOKWw)^`4m>Wr_Kv9<-r;+qX{FI8cTs zi!h6g75Ia@d9;I5GK?17^!41McO`bZJIYVx9ZkyC^P8?4qBvlm##t_pifN5|^@{F= zW-&7+Okm9MRm73YP$AI@xz&2ML&py{_jwtEJTAZAZK*3riPxOS&1}gS!>3RlkbH@m zg=9+M1A7UP>?RM~fq)c^g)8n6UzLFsIgUyZuX~9P4ex zYmpqMe)z2i7HcAj+y+cng)^O+g$pJ31NI7=E@V#5f`TdE!A~sh?5HXU0f-AVBw6G3 zga7dR=eksmryiv;zeK8Q82*a--kfb34Q2*QLcsUl{KNN7_(#IC$M@&cV#9+b3(VEB z)tG-%=W};xr-Rn+WQd6C;eq7DyJH@m2Rk)F^GIKh$atppUb8(G(Q@e8hJ@$ zKHdq}tMQQwrcXL48EJ_WU28=X5jM^2fjC3ZaHA49~*X>a;cj~br<;9L*h7odYp<>8c5*!Fiv}-&EW#DJ#BZD!$ixZ8FXsfo zF!tuEcfD_|nuH1&SY9dRh#L&p6xXmqB4u->#W1a1{zx^L4%HQoU(F_bpjKpJ*2Ejm z+XFrc9=aJIx}UFV4D%Y_v;ndxtG|bf-Wp7rcP8^cLP0B8wsml+=#q%UIF-tp*pO`< zKgG;7JLRB2F4PA7h$_Z(CvO$9av%CCw^ct;!RxANu+N>nCeBMoT&i~0{;nH>gZ>vf zK~^f34CF`gEGVOUf9!FR!?N_j#Qb3g%-wH(@Rk|*s%H{940hZDgSW0iz%a|Yn41Ge ziIH{tfp*RAE1lcn{Y$oXi}Gjax4J&t)S2r_k9EsFsgE=O?4?j|*wpb&LFym7~eZlz$}!v-PR#j;?iY zA>l+fTSe>^Cxxu(Hzm{Y zcVR*IPvFu^T=fVQKigM%>;i_OYO8NeWTk7$9#GFR-e>7Ctm|&`jjmuFK8gy~{F7u8 z$h1~OKB$1&K+Ft$`~NUR6;MpFY{A4E>P|P~>+mc&gI-OR>ieN`z_4Q!L@#Ac^mjO~ zkWET!Lcpuys3F39&72ofyII}DaE;2Wuf2h!%3 zQvq$vs=S!Px_hpIL_oN8S(rIrv7ej6jZ+b8#55(PH{+_6W)(arblEL4fGAnwK`qx< z4bWfeTt2$E^^6)LvpLvNOIL5OXcKs@4+xluRR@jWYNK~DHN$bnn9G&KStF0m@KCVw zsiCyuLt1{gMUH5mK4-VYA(gjU7HpS-&oQegNHJ=}8E(-T-^%F{i)Q1ilYf*!Zvcip zqDgN8WLq$VmGFZWK7-E{?KIU}ZmYrk)lh<7%OaesH%K8CG()~Siy^Vqek&B9w~Z|{s9qmEcJbD$>{B@GjXu~vmw&7| z(BB>5>)2CNtq#5}Oc}~*SeJk)uKLbLU+;jFB&(In0*O71{6hLzhWQ@rmYR^hNbxK&Nh!?OB^CbHQE&ZE5ega z|349)FQC5H@XG~vnZidO%!2v?B#!N=(7A?vtbs_>m-qaUVAkeGf0j#F*IW@HV`~;c z{6Sj$qMlchWx=n$y(3@u)E)W@RPvNv3s+e9=_O0?Sxd+munDekOgRD2?XEw^efJ9+v00=8ZF~pc_TP?UN*IIqJx6}qTHmED z=OLGoHhlc{1&je_MKcss{{%Qo2A3}w56IH{fe zL7q`JXF}+R8#*>V=*d7Q$bGlB=}Y4gextYi>6Z5~Gj2n5m+4`RQMKz#`*+e@X8ExB z^>~@FTIcZ=s&f|NU6Dnr)!0w63Ip4G2fHQH>e72w48ZpLWLr6+9>AU@>Hj+nKG_9& zHQlEII#p7-*l|5(ukm;-H-ZfLbY@OQ%y-_5kV62s`&CCH4mQ{5uwC@%yu`NIDV{=F zQFlH6qtnfFZhXjwfC;;lIyD0YnG@R)to;G_ReJTa&Dp*+|2cp`6Tg*k|FwqAQ6#xOy53)BMF3)IV1n3d%Bd$^6)92{$MMao zO`!<^=&OLLUXk#m0d(J0(X47`y*lRti+bZ$z%uiXfp#J;p31s;D>=n;r>{J$@_l@d z$%#w_EEsjeeCo7c6m@?_c4P{;e~G(ie@7uOH_LUtNGSc&m+t|}(kaxFWdPu??+VKXU z@UpJ=r>x>7U{wc?ZgCYR{E?|r-3xfn3y6Uh)*WQhjlF{-%BK3RE=6hlY%{-S-qz20 zjv2|N!l{oY4&uLNf| zimN)Y&If8J?#5#Cs2fG3b?mam2nKbpMvtz44$hU~;g>MWItU5f6^Tb(_Lk!(wXO=DiR<29UGY21GUYaN<-Ju+PJ{3t5I7usVJw9CB3(~ zLPLr4`8N7HA(T{I5%Z?X?h(-|%n?pbE7EUhn?~)F7b&k4>)ZvzNmnhggDFH<$_G?w z4v^wm%KJxdayW2{LgTr{Er{LBc0*ILs-zF7`na=v-j6*lf^@2Wx1KBX;I_J}vE3Kn z|Ff+I{$sSqIA)7pjp`50w~bpcxd_X)XQI+q@r$c@1p?D~I&AlX?=(^$Vw$nliR3;j zGEc$XnXwtD&ae3nruqpDPq+@Us)c-8tl1*1E}P*rovvD3wNQxO6}^TR_I{Pe_198> zSx#cphSiKeW)RI>_Iu2<-E00JTx+kX?z^DOfKYs?_0a{nCDIv{u@)st=0K z3kY%6ckJF^Iw&$`f0joJ@PudCbU+e4Zn{accwEZZ*t&0*j4kVN$T(t}?!jMMG@_Y6 z>rlJik4KcIlv*s4EzeLoDPX@eHry+(-#X zSQg;_$s+Yzy^?b`IkFlSaRF$sI=JGz+}6@nd%bd#7f?acEOtZ{R=-Uz$z+X*IDNZw!F8!-EZOD zr@{4p9~By5>X$gOQj+U8HdBnS6K@S0k8E1na0PJiyU9Dxp~kfxbmL*EBmWSwrE=Ef z)BGhtyyr3R$nV-($&~|5*Zd6Ln$nNnQzcmU?SLcoAz}q~MDTRq8t~RO@Ro6*Y2z`j zM=KY^xH0SOUmh^N*7*CI;YaFlZE=z>$c2wPV?O9)&^bJV``}mZVlGn$Ht56qh>K*5 z`&}7xUTZZG!p)kvh2d&GKMhHT-?QS~CjwaKeoabOw?9O*cs=B1U&Of2%If$Lr(-ZO zMy+|B72&xiWm>_aiT9jRg`266Q#l=1uW5vh z{oYeKEQMXZ;_-e>K%GvY*4TSJtmX=0xWey^aNc{hP*#L4izDSDY6c(ji+GQlo@goA zpa^cd?0*g(6~DaH)juks*27uU5pyN8<@Z}uP{u<1MXyhPwKdU?LGQ?=O0|p^4qs2* z31zQL+XwbHIf=cGf)yPfuplUbZfW_&w;%P5pzvj->v$h7%BnG$1ji@TT8kJ6Q`*#s z(CGo`(U5D5&ao~(Tz2e%N}PKOOq~WH#zPx}h|GWNg&@XMf`|{A%qIJz08UJz}ZoUHz^IzKkySKb^@pvL00X zXPwEJnjp+Eebkm+c0=-lg1#1{=*f}}P3BB~u|-IxP0ZF8x>1Ubvd8P7?)EK3*!DDh zTeoDb$r-xv{YDwDl$WE$LM0g{r_lXaw!}$uMpyfPZ=V|WDrL!O2P->1V8j`p@6`Ce zv|u|Z$QqHuA+hyVRGe2V!`??Fm|H74Xx}aB?%1e(qt};!eutE9-Dn>ul4{a7P=PbN zdSE?#{D4ei5mI0i5qPk6j=N&m*!jY#Pgu;Pfps-YW1`;XX9ojq&@-RlJM)R&Q=cHo zjLLofb7Qc3!$M^F#zIC2w~0GvYig60#oK!6^?ACwFHSR|uUp@feAbg&CE=Ef$@iT) zZ4GKRri%0u`5!5CM3m>eKecEV&JD?NMh%NQGpfazQ9qm-H9>_*Zfeor9}T?;gUBT> zj2Vo{AjPkeY*m9?JX@_QCWveg^=^D!sz*wX^({L^x5lO*O~#^H%0qVXemRs!Vd%`JN%$&Ge1f3d`a@=!_U_jw#AqC5a(F$}=B>)=%u7BY{|mfEQRev?Y;K z;fr8It&dLf09rHqb__U|pu!3MIWO`kJc;ns+I9=s(oyeY_P(`BcjvBw65cu?VwORd#~GLG`c`xaEn69Nf3_Lq6^X|X zu11o7FXBRRRk17?#*-_1(OZ4^${;ZqjS67>#+8ek%&frtPRQf@sZI6FpTtl6=|Ne} zJz_QPcy}I?i&WpHQ4sjF}Z5B85W%^GU(Z<9+0-#H<~6`oJWXSxg(vKbFe|^ zqa&EOp+WBIZC}&J+W{$FBGKn_CvN*?9&~)#S4L`Ku*+jkF|RPy#n%tju}jGzO||7u z#c!ZBuNq1sI^mC#k(fHlI`eYVaMj?`+^ScdO^zFdSswspVzbhc6k2uWJd==9=aKI| zbsk^#J~3VuRr7mG@wK0e)7R!1G>ofmi8AHp1^w0yW<|VF{;SI}*54uI#R+xIpgFw5 zq&w^kC;!`dT7di*pJ??W<8*R#>Zm1WYXx`xSxeJJ?b7XN&(4%@G=p9`sDBwJ3ol|J25NE9Bz=2Wizh6QT6`?Ej{aozINwk;7j0miW6=olXybwUYWpYo0ML zuXM&d>k0Gcv^r*A!mEUqYqQByo-M{jzrUp{e?TG!Ed-{{1A-5UYquwqx0&u7A05^ zq>TVJY$4~huB3NeFdg0;!`5e`0z$~@*ZT(1%XOiDL8akp%h#v$aTD%CpR&1REd2>o z;*i01JUVV}%<6eE@YPoIc1C}=@ATbMt>C829v;7cHC=iZK^cx>xqW~}OCS6LRV_@( z{dqHLcFPny2*Yk?Epmq3Y~92jd{Os3Z~^ci7Y_e<00`3uhcL7Jcef&o=y6c(kmR-{ z+lu>EqvS?=z134p37l`3Y7N~P+nlMD-i%jw92OR`SgqPbsjvND z!$({O^BP*=FP>(TtuiLC z(qoq(!TUgx4pKz?-=0{&hpK!eE-8%COWzEe|A^lea`}Qp6MsuVzbzgf7tntB+8%#E z+PK&LRF@Qro&T|(rH)7=wq$^YoPP2Y);iD$e8a&E39f?SR+j=+kByKy-aR>xV1Iet z{$RUte=!eku98ly$*yl~T>kcJ`1K)`pG@1TSz&sP%)F^$6l}61a0CI~wTrmJ1Uu?U zqGAEM2-PpkxyD(56HvZ#0Sz#yzR_zoNwXef2j0@hxm^BPm4Mu9r7Wo1sOn8H5Bw<|9vDpHpWshB<7RJCEo z_?~cV&7?>rW}mw`j~mbfZ(DNZMtRHIb>VJt(iJxVtUEQRT>$*Jj69tvkJ_|Tqs>5@ z>37&b3`~)_0Y}ix52u2^3F-SF=dZVb z&Mqtgf=tcgwl=K191m&)rJwQKj;^I##)#DJ7LM6GnYnDITyGz$khK%1;CNI87W**t z^}7RYxaUAy%vHm-nZ1P^ztGsgxAw8k<~c)IjSv{1SizfmEC}nQr&$@})(-<;2z|9y z19&xaI9b6^K4cR7kW$<=9V6%bI+eC6423+%v~uY+RxB`1PKiE{-hVTk8ciZ^ImMCN z{mANCraEO_cKLgLa0Zvp?DrZht+v>kzZdb%V*1yMcr&f9P9;xpN+(^2iIjT0u=WK* zAHCv@;-;gF=!^-IQh0q}x%nKr>pk7O-G^w7?R%zu8&wcr`keu(XD{sM@$*iF<`CbZ z0DF3W*cEq!MMq}hy)Z&s0T#g6yuvt*21`DpQnS#!!A%DYC+!pdF`P73B9WIZ$OeX! zWyaLo^?FT2@FT~Djv%s}7Gbc&ElEZ|Ju0dFY zJe@jgt(~{)^|Z_N7KLt-)Ti04&!d=m-c&k7MV0nluEY-_ambg`nHHv45VRym#CRrN zc`NZ&*zGG21eSsNSH5NprsbM>qx;mRZ$z`c)-YtJbZ9RvI3%hPUc^da7wz*mu7U48Wg7WQ48#Q}lz zx~EEzV&njnfb5TQ`d3N-IWtWLj6RgT#L4K>`*rx!K&(_%E`Qm@@yFvIGIX^aZ;xV@ z<@i#@#B9e9P~lmkd^ybOBcZSoN}adEDuBgc?gpKX%z~?eL)}VQ=kUTvfar{64P~#5 zW99N9@S5QF4sh``k>5u9>a05{@VM+_%mlaI|Dqz`vx zQc>RoeP$(CLf0JWSDWQrsme2o?BoV#`w%3Z?Lx{m6Sov~O^}U8G&3l`WAWJMMvA8 zRyalA;mH%>l1(W4xF^)1;zsn%vovu_DXUdbH}T4XBx89-<3a9P#Okev$nwy>+ImC|}fB-@e=Q{(HD6t3-H|vh=9Y9bJD` zwZxvx7ynO{qY&uTPoy&9>JGh(=V1p+O8Qyd0R{_Y|NYwcdQt+`c1@3;njkd3;3f}o zjcbtLI7RBTynB>@sZ1>R%J~^Se|m!}B~yUK9+R?>Ma|CkB#!T`_FW*Q6WiI>@vx@L z0Z!VX4krf2F71q0xdif)v&m{lw=f`VKv_(B@}QePu6JG&@dE^0=uY?^8f$< literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.sln new file mode 100755 index 0000000000..5a77b3049a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "n_way_buffering", "n_way_buffering.vcxproj", "{49E7063B-56DA-4ACF-B153-5B56A98645BE}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Debug|x64.ActiveCfg = Debug|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Debug|x64.Build.0 = Debug|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Release|x64.ActiveCfg = Release|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {CC320E26-0D79-434A-8E69-3F09BFB2FCF4} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.vcxproj new file mode 100755 index 0000000000..dff6f99529 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/n_way_buffering.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {49e7063b-56da-4acf-b153-5b56a98645be} + Win32Proj + n_way_buffering + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)n_way_buffering.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)n_way_buffering.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/sample.json new file mode 100755 index 0000000000..dffbded768 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "2100C9BD-331C-475B-9878-4D14AAF0981D", + "name": "Overlapping Kernel Execution with Buffer Transfers and Host-Processing through N-Way Buffering", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial design to demonstrate overlapping kernel execution with buffer transfers and multi-threaded host-processing to improve system performance", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./n_way_buffering.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "n_way_buffering.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt new file mode 100755 index 0000000000..cf12b30f72 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt @@ -0,0 +1,93 @@ +set(SOURCE_FILE n_way_buffering.cpp) +set(TARGET_NAME n_way_buffering) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS " -lpthread -fintelfpga") + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() +set(FPGA_OBJ_FILE "dev_fpga.o") + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + + add_custom_command(OUTPUT ${FPGA_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} -fintelfpga -c ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${FPGA_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} ${FPGA_OBJ_FILE} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} -lpthread + DEPENDS ${FPGA_OBJ_FILE}) +endif() + + +# report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/build.ninja new file mode 100755 index 0000000000..80284aff9b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/build.ninja @@ -0,0 +1,30 @@ +source_file = n_way_buffering.cpp +target_name = n_way_buffering + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp new file mode 100755 index 0000000000..c5428348db --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp @@ -0,0 +1,437 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +// N-way buffering. N must be >= 1. +constexpr int kLocalN = 5; + +// # times to execute the kernel. kTimes must be >= kLocalN +#if defined(FPGA_EMULATOR) +constexpr int kTimes = 20; +#else +constexpr int kTimes = 100; +#endif + +// # of floats to process on each kernel execution. +#if defined(FPGA_EMULATOR) +constexpr int kSize = 4096; +#else +constexpr int kSize = 2621440; // ~10MB +#endif + +// Kernel executes a power function (base^kPow). Must be +// >= 2. Can increase this to increase kernel execution +// time, but ProcessOutput() time will also increase. +constexpr int kPow = 20; + +// Number of iterations through the main loop +constexpr int kNumRuns = 4; + +bool pass = true; + +class SimpleVpow; + +/* Kernel function. + Performs buffer_b[i] = buffer_a[i] ** pow + Only supports pow >= 2. + This kernel is not meant to be an optimal implementation of the power + operation -- it's just a sample kernel for this tutorial whose execution time + is easily controlled via the pow parameter. SYCL buffers are created + externally and passed in by reference to control (external to this function) + when the buffers are destructed. The destructor causes a blocking buffer + transfer from device to host and N-way buffering requires us to not block + here (because we need to queue more kernels). So we only want this transfer + to occur at the end of overall execution, not at the end of each individual + kernel execution. +*/ +void SimplePow(std::unique_ptr &q, buffer &buffer_a, + buffer &buffer_b, event &e) { + // Submit to the queue and execute the kernel + e = q->submit([&](handler &h) { + // Get kernel access to the buffers + auto accessor_a = buffer_a.get_access(h); + auto accessor_b = buffer_b.get_access(h); + + const int num = kSize; + const int p = kPow - 1; // Assumes pow >= 2; + assert(kPow >= 2); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (int j = 0; j < p; j++) { + if (j == 0) { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_a[i] * accessor_a[i]; + } + } else { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_b[i] * accessor_a[i]; + } + } + } + }); + }); + + event update_host_event; + update_host_event = q->submit([&](handler &h) { + auto accessor_b = buffer_b.get_access(h); + + /* + Explicitly instruct the SYCL runtime to copy the kernel's output buffer + back to the host upon kernel completion. This is not required for + functionality since the buffer access in ProcessOutput() also implicitly + instructs the runtime to copy the data back. But it should be noted that + this buffer access blocks ProcessOutput() until the kernel is complete + and the data is copied. In contrast, update_host() instructs the runtime + to perform the copy earlier. This allows ProcessOutput() to optionally + perform more useful work *before* making the blocking buffer access. Said + another way, this allows ProcessOutput() to potentially perform more work + in parallel with the runtime's copy operation. + */ + h.update_host(accessor_b); + }); + +} + +// Returns kernel execution time for a given SYCL event from a queue. +ulong SyclGetExecTimeNs(event e) { + ulong start_time = + e.get_profiling_info(); + ulong end_time = + e.get_profiling_info(); + return (end_time - start_time); +} + +// Local pow function for verifying results +float MyPow(float input, int pow) { + return (pow == 0) ? 1 : input * MyPow(input, pow - 1); +} + +/* Compares kernel output against expected output. + Grabs kernel output data from its SYCL buffer. Reading from this buffer is a + blocking operation that will block on the kernel completing. Grabs expected + output from a host-side copy of the input data. A copy is used to allow for + parallel generation of the input data for the next execution. Queries and + records execution time of the kernel that just completed. This is a natural + place to do this because ProcessOutput() is blocked on kernel completion. +*/ +void ProcessOutput(buffer &output_buf, + std::vector &input_copy, int exec_number, event e, + ulong &total_kernel_time_per_slot) { + auto output_buf_acc = output_buf.get_access(); + int num_errors = 0; + int num_errors_to_print = 10; + + /* The use of update_host() in the kernel function allows for additional + host-side operations to be performed here, in parallel with the buffer copy + operation from device to host, before the blocking access to the output + buffer is made via output_buf_acc[]. To be clear, no real operations are + done here and this is just a note that this is the place + where you *could* do it. */ + for (int i = 0; i < kSize; i++) { + bool out_valid = (MyPow(input_copy.data()[i], kPow) != output_buf_acc[i]); + if ((num_errors < num_errors_to_print) && out_valid) { + if (num_errors == 0) { + pass = false; + std::cout << "Verification failed on kernel execution # " << exec_number + << ". Showing up to " << num_errors_to_print + << " mismatches.\n"; + } + std::cout << "Verification failed on kernel execution # " << exec_number + << ", at element " << i << ". Expected " << std::fixed + << std::setprecision(16) << MyPow(input_copy.data()[i], kPow) + << " but got " << output_buf_acc[i] << "\n"; + num_errors++; + } + } + + // At this point we know the kernel has completed, so can query the profiling + // data. + total_kernel_time_per_slot += SyclGetExecTimeNs(e); +} + +/* + Generates input data for the next kernel execution. + Writes the data into the associated SYCL buffer. The write will block until + the previous kernel execution, that is using this buffer, completes. Writes a + copy of the data into a host-side buffer that will later be used by + ProcessOutput(). +*/ +void ProcessInput(buffer &buf, std::vector ©) { + // We are generating completely new input data, so can use discard_write() + // here to indicate we don't care about the SYCL buffer's current contents. + auto buf_acc = buf.get_access(); + + // RNG seed + auto seed = std::chrono::system_clock::now().time_since_epoch().count(); + + // RNG engine + std::default_random_engine dre(seed); + + // Values between 1 and 2 + std::uniform_real_distribution di(1.0f, 2.0f); + + // Randomly generate a start value and increment from there. + // Compared to randomly generating every value, this is done to + // speed up this function a bit. + float start_val = di(dre); + + for (int i = 0; i < kSize; i++) { + buf_acc[i] = start_val; + copy.data()[i] = start_val; + start_val++; + } +} + +int main() { +// Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; + std::cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector device_selector; +#endif + + try { + auto prop_list = + property_list{property::queue::enable_profiling()}; + + std::unique_ptr q; + q.reset(new queue(device_selector, dpc_common::exception_handler, prop_list)); + + platform platform = q->get_context().get_platform(); + device device = q->get_device(); + std::cout << "Platform name: " + << platform.get_info().c_str() << "\n"; + std::cout << "Device name: " + << device.get_info().c_str() << "\n\n\n"; + + std::cout << "Executing kernel " << kTimes << " times in each round.\n\n"; + + // Create a vector to store the input/output SYCL buffers + std::vector> input_buf; + std::vector> output_buf; + + // For every execution slot, we need 2 host-side buffers + // to store copies of the input data. One is used to + // verify the previous kernel's output. The other stores + // the new data for the next kernel execution. + std::vector input_buf_copy[2 * kLocalN]; + + // SYCL events for each kernel launch. + event sycl_events[kLocalN]; + + // In nanoseconds. Total execution time of kernels in a given slot. + ulong total_kernel_time_per_slot[kLocalN]; + + // Total execution time of all kernels. + ulong total_kernel_time = 0; + + // Threads to process the output from each kernel + std::thread t_process_output[kLocalN]; + + // Threads to process the input data for the next kernel + std::thread t_process_input[kLocalN]; + + // Demonstrate with 1-way buffering first, then N-way buffering. + int N; + + // st = "single threaded". + // Used to enable multi-threading in subsequent runs. + bool st = true; + + // Allocate vectors to store the host-side copies of the input data + for (int i = 0; i < 2 * kLocalN; i++) { + input_buf_copy[i] = std::vector(kSize); + } + + // Create and allocate the SYCL buffers + for (int i = 0; i < kLocalN; i++) { + input_buf.push_back(buffer(range<1>(kSize))); + output_buf.push_back(buffer(range<1>(kSize))); + } + + /* + Main loop. + This loop runs multiple times to demonstrate how performance can be + improved by increasing the number of buffers as well as multi-threading + the host-side operations. The first iteration is a base run, demonstrating + the performance with none of these optimizations (ie. 1-way buffering, + single-threaded). + */ + for (int i = 0; i < kNumRuns; i++) { + for (int i = 0; i < kLocalN; i++) { + total_kernel_time_per_slot[i] = 0; // Initialize timers to zero. + } + + switch (i) { + case 0: { + std::cout << "*** Beginning execution, 1-way buffering, " + "single-threaded host operations\n"; + N = 1; + st = true; + break; + } + case 1: { + std::cout << "*** Beginning execution, 1-way buffering, " + "multi-threaded host operations.\n"; + N = 1; + st = false; + break; + } + case 2: { + std::cout << "*** Beginning execution, 2-way buffering, " + "multi-threaded host operationss\n"; + N = 2; + st = false; + break; + } + case 3: { + std::cout << "*** Beginning execution, N=" << kLocalN + << "-way buffering, multi-threaded host operations\n"; + N = kLocalN; + st = false; + break; + } + default: + std::cout << "*** Beginning execution.\n"; + } + + // Start the timer. This will include the time to process the + // input data for the first N kernel executions. + dpc_common::TimeInterval exec_time; + + // Process the input data for first N kernel executions. For + // multi-threaded runs, this is done in parallel. + for (int i = 0; i < N; i++) { + t_process_input[i] = std::thread(ProcessInput, std::ref(input_buf[i]), + std::ref(input_buf_copy[i])); + if (st) { + t_process_input[i].join(); + } + } + + /* + It's useful to think of the kernel execution space as having N slots. + Conceptually, the slots are executed chronologically sequentially on the + device (i.e. slot 0 to N-1). Each slot has its own buffering on both the + host and device. Before launching a kernel in a given slot, we must + process output data from the previous execution that occurred in that + slot and process new input data for the upcoming new execution in that + slot. + */ + for (int i = 0; i < kTimes; i++) { + // The current slot is i%N. + // Before each kernel launch, the ProcessOutput() must have completed + // for the last execution in this slot. The ProcessInput() must also + // have completed for the upcoming new execution for this slot. Block on + // both of these. + if (!st) { + // ProcessOutput() is only relevant after the + // first N kernels have been launched. + if (i >= N) { + t_process_output[i % N].join(); + } + + t_process_input[i % N].join(); + } + + // Launch the kernel. This is non-blocking with respect to main(). + // Only print every few iterations, just to limit the prints. + if (i % 10 == 0) { + std::cout << "Launching kernel #" << i << "\n"; + } + + SimplePow(q, input_buf[i % N], output_buf[i % N], sycl_events[i % N]); + + // Immediately launch threads for the ProcessOutput() and + // ProcessInput() for *this* slot. These are non-blocking with respect + // to main(), but they will individually be blocked until the + // corresponding kernel execution is complete. The ProcessOutput() + // compares the kernel output data against the input data. But + // ProcessInput() will be overwriting that input data in parallel. + // Therefore ProcessOutput() must compare against an older copy of the + // data. We ping-pong between host-side copies of the input data. + t_process_output[i % N] = std::thread( + ProcessOutput, std::ref(output_buf[i % N]), + std::ref(input_buf_copy[i % (2 * N)]), i, sycl_events[i % N], + std::ref(total_kernel_time_per_slot[i % N])); + + // For single-threaded runs, force single-threaded operation by + // blocking here immediately. + if (st) { + t_process_output[i % N].join(); + } + + // For the final N kernel launches, no need to process + // input data because there will be no more launches. + if (i < kTimes - N) { + // The indexes for the input_buf_copy used by ProcessOutput() and + // ProcessInput() are spaced N apart. + t_process_input[i % N] = + std::thread(ProcessInput, std::ref(input_buf[i % N]), + std::ref(input_buf_copy[(i + N) % (2 * N)])); + + if (st) { + t_process_input[i % N].join(); + } + } + } + + // Wait for the final N threads to finish and add up the overall kernel + // execution time. + total_kernel_time = 0; + for (int i = 0; i < N; i++) { + if (!st) { + t_process_output[i].join(); + } + total_kernel_time += total_kernel_time_per_slot[i]; + } + + // Stop the timer. + double time_span = exec_time.Elapsed(); + + std::cout << "\nOverall execution time " + << ((i == kNumRuns - 1) ? ("with N-way buffering ") : "") + << "= " << (unsigned)(time_span * 1000) << " ms\n"; + std::cout << "Total kernel-only execution time " + << ((i == kNumRuns - 1) ? ("with N-way buffering ") : "") + << "= " << (unsigned)(total_kernel_time / 1000000) << " ms\n"; + std::cout << "Throughput = " << std::setprecision(8) + << (float)kSize * (float)kTimes * (float)sizeof(float) / + (float)time_span / 1000000 + << " MB/s\n\n\n"; + } + if (pass) { + std::cout << "Verification PASSED\n"; + } else { + std::cout << "Verification FAILED\n"; + return 1; + } + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/CMakeLists.txt new file mode 100755 index 0000000000..4835f73b5f --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(LocalMemoryCache) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md new file mode 100755 index 0000000000..8a974787e4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md @@ -0,0 +1,189 @@ +# Caching On-Chip Memory to Improve Loop Performance +This FPGA tutorial demonstrates how to build a simple cache (implemented in FPGA registers) to store recently-accessed memory locations so that the compiler can achieve II=1 on critical loops in task kernels. + + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How and when to implement the on-chip memory cache optimization +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +In DPC++ task kernels for FPGA, it is always our objective to achieve an initiation interval (II) of 1 on performance-critical loops. This means that a new loop iteration is launched on every clock cycle, maximizing the throughput of the loop. + +When the loop contains a loop-carried variable that is implemented in on-chip memory, the compiler often *cannot* achieve II=1 because the memory access takes more than one clock cycle. If the updated memory location may be needed on the next loop iteration, the next iteration must be delayed to allow time for the update, hence II > 1. + +The on-chip memory cache technique breaks this dependency by storing recently-accessed values in a cache capable of a 1-cycle read-modify-write operation. The cache is implemented in FPGA registers rather than on-chip memory. By pulling memory accesses preferentially from the register cache, the loop-carried dependency is broken. + +### When is the on-chip memory cache technique applicable? + +***Failure to achieve II=1 because of a loop-carried memory dependency in on-chip memory***: +The on-chip memory cache technique is applicable if compiler could not pipeline a loop with II=1 because of an on-chip memory dependency. (If the compiler could not achieve II=1 because of a *global* memory dependency, this technique does not apply as the access latencies are too great.) + +To check this for a given design, view the "Loops Analysis" section of its optimization report. The report lists the II of all loops and explains why a lower II is not achievable. Check whether the reason given resembles "the compiler failed to schedule this loop with smaller II due to memory dependency". The report will describe the "most critical loop feedback path during scheduling". Check whether this includes on-chip memory load/store operations on the critical path. + +***An II=1 loop with a load operation of latency 1***: +The compiler is capable of reducing the latency of on-chip memory accesses in order to achieve II=1. However, in doing so the compiler makes a trade-off, sacrificing fMAX to better optimize the loop. + +In a design with II=1 critical loops but lower than desired fMAX, the on-chip memory cache technique may still be applicable. It can help recover fMAX by enabling the compiler to achieve II=1 with a higher latency memory access. + +To check whether this is the case for a given design, view the "Kernel Memory Viewer" section of the optimization report. Select the on-chip memory of interest from the Kernel Memory List, and mouse over the load operation "LD" to check its latency. If the latency of the load operation is 1, this is a clear sign that the compiler has attempted to sacrifice fMAX to better optimize a loop. + + +### Implementing the on-chip memory cache technique + +The tutorial demonstrates the technique using a program that computes a histogram. The histogram operation accepts an input vector of values, separates the values into buckets, and counts the number of values per bucket. For each input value, an output bucket location is determined, and the count for the bucket is incremented. This count is stored in the on-chip memory and the increment operation requires reading from the memory, performing the increment, and storing the result. This read-modify-write operation is the critical path that can result in II > 1. + +To reduce II, the idea is to store recently-accessed values in an FPGA register-implemented cache that is capable of a 1-cycle read-modify-write operation. If the memory location required on a given iteration exists in the cache, it is pulled from there. The updated count is written back to *both* the cache and the on-chip memory. The `ivdep` pragma is added to inform the compiler that if a loop-carried variable (namely, the variable storing the histogram output) is needed within `CACHE_DEPTH` iterations, it is guaranteed to be available right away. + +### Selecting the cache depth + +While any value of `CACHE_DEPTH` results in functional hardware, the ideal value of `CACHE_DEPTH` requires some experimentation. The depth of the cache needs to roughly cover the latency of the on-chip memory access. To determine the correct value, it is suggested to start with a value of 2 and then increase it until both II = 1 and load latency > 1. In this tutorial, a `CACHE_DEPTH` of 5 is needed. + +Each iteration takes only a few moments by running `make report` (refer to the section below on how to build the design). It is important to find the *minimal* value of `CACHE_DEPTH` that results in a maximal performance increase. Unnecessarily large values of `CACHE_DEPTH` consume unnecessary FPGA resources and can reduce fMAX. Therefore, at a `CACHE_DEPTH` that results in II=1 and load latency = 1, if further increases to `CACHE_DEPTH` show no improvement, then `CACHE_DEPTH` should not be increased any further. + +In the tutorial, two versions of the histogram kernel are implemented: one with and one without caching. The report shows II > 1 for the loop in the kernel without caching and II = 1 for the one with caching. + +## Key Concepts +* How to implement the on-chip memory cache optimization technique +* The scenarios in which this technique benefits performance +* How to tune the cache depth + +## License +This code sample is licensed under MIT license. + + +## Building the `onchip_memory_cache` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Examining the Reports +Locate `report.html` in the `onchip_memory_cache_report.prj/reports/` or `onchip_memory_cache_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Compare the Loop Analysis reports with and without the onchip memory cache optimization, as described in the "When is the on-chip memory cache technique applicable?" section. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./onchip_memory_cache.fpga_emu (Linux) + onchip_memory_cache.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./onchip_memory_cache.fpga (Linux) + ``` + +### Example of Output + +``` +Platform name: Intel(R) FPGA SDK for OpenCL(TM) +Device name: pac_a10 : Intel PAC Platform (pac_ee00000) + + +Number of inputs: 16777216 +Number of outputs: 64 + +Beginning run without local memory caching. + +Verification PASSED + +Kernel execution time: 0.114106 seconds +Kernel throughput without caching: 560.884047 MB/s + +Beginning run with local memory caching. + +Verification PASSED + +Kernel execution time: 0.059061 seconds +Kernel throughput with caching: 1083.623184 MB/s +``` + +### Discussion of Results + +A test compile of this tutorial design achieved an fMAX of approximately 250 MHz on the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. The results are shown in the following table: + +Configuration | Execution Time (ms) | Throughput (MB/s) +-|-|- +Without caching | 0.153 | 418 +With caching | 0.08 | 809 + +When caching is used, performance notably increases. As previously mentioned, this technique should result in an II reduction, which should lead to a throughput improvement. The technique can also improve fMAX if the compiler had previously implemented a latency=1 load operation, in which case the fMAX increase should result in a further throughput improvement. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.sln new file mode 100755 index 0000000000..3df819f016 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "onchip_memory_cache", "onchip_memory_cache.vcxproj", "{66A01391-21D2-46BB-A37A-6B8670BEE1FC}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {66A01391-21D2-46BB-A37A-6B8670BEE1FC}.Debug|x64.ActiveCfg = Debug|x64 + {66A01391-21D2-46BB-A37A-6B8670BEE1FC}.Debug|x64.Build.0 = Debug|x64 + {66A01391-21D2-46BB-A37A-6B8670BEE1FC}.Release|x64.ActiveCfg = Release|x64 + {66A01391-21D2-46BB-A37A-6B8670BEE1FC}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {E3206292-E99D-4ADC-B428-E0557E8070D4} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.vcxproj new file mode 100755 index 0000000000..940683894e --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/onchip_memory_cache.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {66a01391-21d2-46bb-a37a-6b8670bee1fc} + Win32Proj + onchip_memory_cache + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)onchip_memory_cache.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)onchip_memory_cache.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/sample.json new file mode 100755 index 0000000000..a35ba679ac --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "93DA332C-5490-4E4B-8038-BDEC1662A2D0", + "name": "Caching On-Chip Memory to Improve Loop Performance", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating the caching of on-chip memory to reduce loop initiation interval.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "builder": ["ide", "cmake"], + "targetDevice": ["FPGA"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./onchip_memory_cache.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "onchip_memory_cache.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt new file mode 100755 index 0000000000..9ed3cee584 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE onchip_memory_cache.cpp) +set(TARGET_NAME onchip_memory_cache) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/build.ninja new file mode 100755 index 0000000000..94d90e092c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/build.ninja @@ -0,0 +1,30 @@ +source_file = onchip_memory_cache.cpp +target_name = onchip_memory_cache + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp new file mode 100755 index 0000000000..83b48eac97 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp @@ -0,0 +1,235 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +constexpr int kInitNumInputs = 16 * 1024 * 1024; // Default number of inputs. +constexpr int kNumOutputs = 64; // Number of outputs +constexpr int kInitSeed = 42; // Seed for randomizing data inputs +constexpr int kCacheDepth = 5; // Depth of the cache. +constexpr int kNumRuns = 2; // runs twice to show the impact of cache +constexpr double kNs = 1000000000.0; // number of nanoseconds in a second + +template +class Task; + +// This kernel function implements two data paths: with and without caching. +// use_cache specifies which path to take. +template +void Histogram(std::unique_ptr& q, buffer& input_buf, + buffer& output_buf, event& e) { + // Enqueue kernel + e = q->submit([&](handler& h) { + // Get accessors to the SYCL buffers + auto input = input_buf.get_access(h); + auto output = output_buf.get_access(h); + + h.single_task>([=]() [[intel::kernel_args_restrict]] { + + // On-chip memory for Histogram + uint32_t local_output[kNumOutputs]; + uint32_t local_output_with_cache[kNumOutputs]; + + // Register-based cache of recently-accessed memory locations + uint32_t last_sum[kCacheDepth + 1]; + uint32_t last_sum_index[kCacheDepth + 1]; + + // Initialize Histogram to zero + for (uint32_t b = 0; b < kNumOutputs; ++b) { + local_output[b] = 0; + local_output_with_cache[b] = 0; + } + + // Compute the Histogram + if (!use_cache) { // Without cache + for (uint32_t n = 0; n < kInitNumInputs; ++n) { + // Compute the Histogram index to increment + uint32_t b = input[n] % kNumOutputs; + local_output[b]++; + } + } else { // With cache + + // Specify that the minimum dependence-distance of + // loop carried variables is kCacheDepth. + [[intelfpga::ivdep(kCacheDepth)]] for (uint32_t n = 0; + n < kInitNumInputs; ++n) { + // Compute the Histogram index to increment + uint32_t b = input[n] % kNumOutputs; + + // Get the value from the on-chip mem at this index. + uint32_t val = local_output_with_cache[b]; + + // However, if this location in on-chip mem was recently + // written to, take the value from the cache. + #pragma unroll + for (int i = 0; i < kCacheDepth + 1; i++) { + if (last_sum_index[i] == b) val = last_sum[i]; + } + + // Write the new value to both the cache and the on-chip mem. + last_sum[kCacheDepth] = local_output_with_cache[b] = val + 1; + last_sum_index[kCacheDepth] = b; + + // Cache is just a shift register, so shift the shift reg. Pushing + // into the back of the shift reg is done above. + #pragma unroll + for (int i = 0; i < kCacheDepth; i++) { + last_sum[i] = last_sum[i + 1]; + last_sum_index[i] = last_sum_index[i + 1]; + } + } + } + + // Write output to global memory + for (uint32_t b = 0; b < kNumOutputs; ++b) { + if (!use_cache) { + output[b] = local_output[b]; + } else { + output[b] = local_output_with_cache[b]; + } + } + }); + }); +} + +int main() { + // Host and kernel profiling + event e; + ulong t1_kernel, t2_kernel; + double time_kernel; + +// Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; + std::cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector device_selector; +#endif + try { + auto prop_list = + property_list{property::queue::enable_profiling()}; + + std::unique_ptr q; + q.reset(new queue(device_selector, dpc_common::exception_handler, prop_list)); + + platform platform = q->get_context().get_platform(); + device device = q->get_device(); + std::cout << "Platform name: " + << platform.get_info().c_str() << "\n"; + std::cout << "Device name: " + << device.get_info().c_str() << "\n\n\n"; + + std::cout << "\nNumber of inputs: " << kInitNumInputs << "\n"; + std::cout << "Number of outputs: " << kNumOutputs << "\n\n"; + + // Create input and output buffers + auto input_buf = buffer(range<1>(kInitNumInputs)); + auto output_buf = buffer(range<1>(kNumOutputs)); + + srand(kInitSeed); + + // Compute the reference solution + uint32_t gold[kNumOutputs]; + + { + // Get host-side accessors to the SYCL buffers + auto input_host = input_buf.get_access(); + // Initialize random input + for (int i = 0; i < kInitNumInputs; ++i) { + input_host[i] = rand(); + } + + for (int b = 0; b < kNumOutputs; ++b) { + gold[b] = 0; + } + for (int i = 0; i < kInitNumInputs; ++i) { + int b = input_host[i] % kNumOutputs; + gold[b]++; + } + } + + // Host accessor is now out-of-scope and is destructed. This is required + // in order to unblock the kernel's subsequent accessor to the same buffer. + + for (int i = 0; i < kNumRuns; i++) { + switch (i) { + case 0: { + std::cout << "Beginning run without on-chip memory caching.\n\n"; + Histogram(q, input_buf, output_buf, e); + break; + } + case 1: { + std::cout << "Beginning run with on-chip memory caching.\n\n"; + Histogram(q, input_buf, output_buf, e); + break; + } + default: { + Histogram(q, input_buf, output_buf, e); + } + } + + // Wait for kernels to finish + q->wait(); + + // Compute kernel execution time + t1_kernel = e.get_profiling_info(); + t2_kernel = e.get_profiling_info(); + time_kernel = (t2_kernel - t1_kernel) / kNs; + + // Get accessor to output buffer. Accessing the buffer at this point in + // the code will block on kernel completion. + auto output_host = output_buf.get_access(); + + // Verify output and print pass/fail + bool passed = true; + int num_errors = 0; + for (int b = 0; b < kNumOutputs; b++) { + if (num_errors < 10 && output_host[b] != gold[b]) { + passed = false; + std::cout << " (mismatch, expected " << gold[b] << ")\n"; + num_errors++; + } + } + + if (passed) { + std::cout << "Verification PASSED\n\n"; + + // Report host execution time and throughput + std::cout.setf(std::ios::fixed); + double N_MB = (kInitNumInputs * sizeof(uint32_t)) / + (1024 * 1024); // Input size in MB + + // Report kernel execution time and throughput + std::cout << "Kernel execution time: " << time_kernel << " seconds\n"; + std::cout << "Kernel throughput " << (i == 0 ? "without" : "with") + << " caching: " << N_MB / time_kernel << " MB/s\n\n"; + } else { + std::cout << "Verification FAILED\n"; + return 1; + } + } + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/CMakeLists.txt new file mode 100755 index 0000000000..09e703741b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(PipeArray) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/README.md new file mode 100755 index 0000000000..d292d6465f --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/README.md @@ -0,0 +1,215 @@ + +# Data Transfers Using Pipe Arrays +This FPGA tutorial showcases a design pattern that makes it possible to create arrays of pipes. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | A design pattern to generate a array of pipes in DPC++
Static loop unrolling through template metaprogramming +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +In certain situations, it is useful to be able to create collection of pipes that can be indexed like an array in a DPC++ FPGA design. If you are not yet familiar with DPC++ pipes, refer to the prerequisite tutorial "Data Transfers Using Pipes". + +In SYCL*, each pipe defines a unique type with static methods for reading data (`read`) and writing data (`write`). Since pipes are not objects but *types*, defining a collection of pipes requires C++ template meta-programming. This is somewhat non-intuitive but yields highly efficient code. + +This tutorial provides a convenient pair of header files defining an abstraction for an array of pipes. The headers can be used in any DPC++ design and can be extended as necessary. + +### Example 1: A simple array of pipes + +To create an array of pipes, include the top-level header (from this code sample) in your design: + +```c++ +#include "pipe_array.hpp" +``` + +As with regular pipes, an array of pipes needs template parameters for an ID, for the `min_capacity` of each pipe, and for the data type of each pipe. An array of pipes additionally requires one or more template parameters to specify the array size. The following code declares a one dimensional array of 10 pipes, each with `capacity=32`, that operate on `int` values. + +```c++ +using MyPipeArray = PipeArray< // Defined in "pipe_array.h". + class MyPipe, // An identifier for the pipe. + int, // The type of data in the pipe. + 32, // The capacity of each pipe. + 10, // array dimension. + >; +``` + +The uniqueness of a pipe array is derived from a combination of all template parameters. + +Indexing inside a pipe array can be done via the `PipeArray::PipeAt` type alias, as shown in the following code snippet: + +```c++ +MyPipeArray::PipeAt<3>::write(17); +auto x = MyPipeArray::PipeAt<3>::read(); +``` +The template parameter `<3>` identifies a specific pipe within the array of pipes. The index of the pipe being accessed *must* be determinable at compile time. + +In most cases, we want to use an array of pipes so that we can iterate over them in a loop. In order to respect the requirement that all pipe indices are uniquely determinable at compile time, we must use a static form of loop unrolling based on C++ templates. A simple example is shown in the code snippet: + +```c++ +// Write 17 to every pipe in the array +Unroller<0, 10>::Step([](auto i) { + MyPipeArray::PipeAt::write(17); +}); +``` +While this may initially feel foreign to those unaccustomed to C++ template metaprogramming, this is a simple and powerful pattern common to many C++ libraries. It is easy to reuse. In addition to `pipe_array.hpp`, this code sample includes a simple header file `unroller.hpp`, which implements the `Unroller` functionality. + +### Example 2: A 2D array of pipes + +This code sample defines a `Producer` kernel that reads data from host memory and forwards this data into a two dimensional pipe matrix. + +The following code snippet creates a two dimensional pipe array. +``` c++ +constexpr size_t kNumRows = 2; +constexpr size_t kNumCols = 2; +constexpr size_t kDepth = 2; + +using ProducerToConsumerPipeMatrix = PipeArray< // Defined in "pipe_array.h". + class ProducerConsumerPipe, // An identifier for the pipe. + uint64_t, // The type of data in the pipe. + kDepth, // The capacity of each pipe. + kNumRows, // array dimension. + kNumCols // array dimension. + >; +``` +The producer kernel writes `num_passes` units of data into each of the `kNumRows * kNumCols` pipes. Note that the unrollers' lambdas must capture certain variables from their outer scope. + +```c++ +h.single_task([=]() { + size_t input_idx = 0; + for (size_t pass = 0; pass < num_passes; pass++) { + // Template-based unroll (outer "i" loop) + Unroller<0, kNumRows>::Step([&input_idx, input_accessor](auto i) { + // Template-based unroll (inner "j" loop) + Unroller<0, kNumCols>::Step([&input_idx, i, input_accessor](auto j) { + // Write a value to the pipe of the pipe array + ProducerToConsumerPipeMatrix::PipeAt::write( + input_accessor[input_idx++]); + }); + }); + } +}); +``` + +The code sample also defines an array of `Consumer` kernels that each read from a unique pipe in `ProducerToConsumerPipeMatrix`, process the data, and write the result to the host memory. + +```c++ +// The consumer kernel reads from a single pipe, determined by consumer_id +h.single_task>([=]() { + constexpr size_t x = consumer_id / kNumCols; + constexpr size_t y = consumer_id % kNumCols; + for (size_t i = 0; i < num_elements; ++i) { + auto input = ProducerToConsumerPipeMatrix::PipeAt::read(); + uint64_t answer = ConsumerWork(input); // do some processing + output_accessor[i] = answer; + } +}); +``` + +The host must thus enqueue the producer kernel and `kNumRows * kNumCols` separate consumer kernels. The latter is achieved through another static unroll. +```c++ +{ + queue q(device_selector, dpc_common::exception_handler); + + // Enqueue producer + buffer producer_buffer(producer_input); + Producer(q, producer_buffer); + + // Use template-based unroll to enqueue multiple consumers + std::vector> consumer_buffers; + Unroller<0, kNumberOfConsumers>::Step([&](auto consumer_id) { + consumer_buffers.emplace_back(consumer_output[consumer_id].data(), items_per_consumer); + Consumer(q, consumer_buffers.back()); + }); +} +``` + +## Key Concepts +* A design pattern to generate a array of pipes in DPC++ +* Static loop unrolling through template metaprogramming + +## License +This code sample is licensed under MIT license. + + +## Building the `pipe_array` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Examining the Reports +Locate `report.html` in the `pipe_array_report.prj/reports/` or `pipe_array_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +You can visualize the kernels and pipes generated by looking at the "System Viewer" section of the report. However, it is recommended that you first reduce the array dimensions `kNumRows` and `kNumCols` to small values (2 or 3) to facilitate visualization. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./pipe_array.fpga_emu (Linux) + pipe_array.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./pipe_array.fpga (Linux) + ``` + +### Example of Output +``` +Input Array Size: 1024 +Enqueuing producer... +Enqueuing consumer 0... +Enqueuing consumer 1... +Enqueuing consumer 2... +Enqueuing consumer 3... +PASSED: The results are correct +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.sln new file mode 100755 index 0000000000..efb4ff761f --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pipe_array", "pipe_array.vcxproj", "{FA3FB2D1-BA98-4B4E-A8FA-A9BE6F8CA204}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {FA3FB2D1-BA98-4B4E-A8FA-A9BE6F8CA204}.Debug|x64.ActiveCfg = Debug|x64 + {FA3FB2D1-BA98-4B4E-A8FA-A9BE6F8CA204}.Debug|x64.Build.0 = Debug|x64 + {FA3FB2D1-BA98-4B4E-A8FA-A9BE6F8CA204}.Release|x64.ActiveCfg = Release|x64 + {FA3FB2D1-BA98-4B4E-A8FA-A9BE6F8CA204}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {936BD366-28EA-4A45-B5CF-EE6630694F28} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.vcxproj new file mode 100755 index 0000000000..5ebc0c86e4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/pipe_array.vcxproj @@ -0,0 +1,165 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + 15.0 + {fa3fb2d1-ba98-4b4e-a8fa-a9be6f8ca204} + Win32Proj + pipe_array + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)pipe_array.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)pipe_array.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/sample.json new file mode 100755 index 0000000000..047514cfcc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "11A61AF6-727E-4241-B5A0-CCCD0EF160B9", + "name": "Data Transfers Using Pipe Arrays", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial showcasing a design pattern to enables the creation of arrays of pipes.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./pipe_array.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "pipe_array.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt new file mode 100755 index 0000000000..0301dbed55 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt @@ -0,0 +1,91 @@ +set(SOURCE_FILE pipe_array.cpp) +set(TARGET_NAME pipe_array) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/pipe_array.hpp pipe_array.hpp COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/unroller.hpp unroller.hpp COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/pipe_array_internal.hpp pipe_array_internal.hpp COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE} pipe_array.hpp unroller.hpp pipe_array_internal.hpp) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/build.ninja new file mode 100755 index 0000000000..3ea2cc86e1 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/build.ninja @@ -0,0 +1,30 @@ +source_file = pipe_array.cpp +target_name = pipe_array + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -std=c++14 +emulator_flags = -fintelfpga -DFPGA_EMULATOR -std=c++14 + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp new file mode 100755 index 0000000000..e5bcbbaec1 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp @@ -0,0 +1,177 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" +#include "pipe_array.hpp" +#include "unroller.hpp" + +using namespace sycl; + +constexpr size_t kNumRows = 2; +constexpr size_t kNumCols = 2; +constexpr size_t kNumberOfConsumers = kNumRows * kNumCols; +constexpr size_t kDepth = 2; + +using ProducerToConsumerPipeMatrix = PipeArray< // Defined in "pipe_array.h". + class ProducerConsumerPipe, // An identifier for the pipe. + uint64_t, // The type of data in the pipe. + kDepth, // The capacity of each pipe. + kNumRows, // array dimension. + kNumCols // array dimension. + >; + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +class ProducerTutorial; +template class ConsumerTutorial; + +void Producer(queue &q, buffer &input_buffer) { + std::cout << "Enqueuing producer...\n"; + + auto e = q.submit([&](handler &h) { + auto input_accessor = input_buffer.get_access(h); + auto num_elements = input_buffer.get_count(); + auto num_passes = num_elements / kNumberOfConsumers; + + // The producer kernel writes to every pipe in the 2D pipe array + h.single_task([=]() { + size_t input_idx = 0; + for (size_t pass = 0; pass < num_passes; pass++) { + // Template-based unroll (outer "i" loop) + Unroller<0, kNumRows>::Step([&input_idx, input_accessor](auto i) { + // Template-based unroll (inner "j" loop) + Unroller<0, kNumCols>::Step([&input_idx, &i, input_accessor](auto j) { + // Write a value to the pipe of the pipe array + ProducerToConsumerPipeMatrix::PipeAt::write( + input_accessor[input_idx++]); + }); + }); + } + }); + }); +} + +// Do some work on the data (any function could be substituted) +uint64_t ConsumerWork(uint64_t i) { return i * i; } + +template +void Consumer(queue &q, buffer &out_buf) { + std::cout << "Enqueuing consumer " << consumer_id << "...\n"; + + auto e = q.submit([&](handler &h) { + auto output_accessor = out_buf.get_access(h); + auto num_elements = out_buf.get_count(); + + // The consumer kernel reads from a single pipe, determined by consumer_id + h.single_task>([=]() { + constexpr size_t consumer_x = consumer_id / kNumCols; + constexpr size_t consumer_y = consumer_id % kNumCols; + for (size_t i = 0; i < num_elements; ++i) { + auto input = ProducerToConsumerPipeMatrix::PipeAt::read(); + uint64_t answer = ConsumerWork(input); + output_accessor[i] = answer; + } + }); + }); +} + +int main(int argc, char *argv[]) { + uint64_t array_size = 1; + array_size <<= 10; + + // Parse optional data size argument + if (argc > 1) { + std::string option(argv[1]); + if (option == "-h" || option == "--help") { + std::cout << "Usage: \n \n\nFAILED\n"; + return 1; + } else { + array_size = std::stoi(option); + } + } + + std::cout << "Input Array Size: " << array_size << "\n"; + + // Check input validity + if (array_size % kNumberOfConsumers != 0) { + std::cout << "Array size must be a multiple of the number of consumers! " + "Exiting...\n"; + return 0; + } + + // Set up producer input vector, and kNumberOfConsumers output vectors + uint64_t items_per_consumer = array_size / kNumberOfConsumers; + std::vector producer_input(array_size, -1); + std::array, kNumberOfConsumers> consumer_output; + + for (auto &output : consumer_output) + output.resize(items_per_consumer, -1); + + // Initialize producer input + for (size_t i = 0; i < array_size; i++) + producer_input[i] = i; + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + queue q(device_selector, dpc_common::exception_handler); + + // Enqueue producer + buffer producer_buffer(producer_input); + Producer(q, producer_buffer); + + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + std::vector> consumer_buffers; + + // Use template-based unroll to enqueue multiple consumers + Unroller<0, kNumberOfConsumers>::Step([&](auto consumer_id) { + consumer_buffers.emplace_back(consumer_output[consumer_id].data(), + items_per_consumer); + Consumer(q, consumer_buffers.back()); + }); + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // Verify result + for (size_t i = 0; i < items_per_consumer; ++i) { + for (size_t consumer = 0; consumer < kNumberOfConsumers; ++consumer) { + auto fpga_result = consumer_output[consumer][i]; + auto expected_result = ConsumerWork(kNumberOfConsumers * i + consumer); + if (fpga_result != expected_result) { + std::cout << "FAILED: The results are incorrect\n"; + std::cout << "On Input: " << kNumberOfConsumers * i + consumer + << " Expected: " << expected_result << " Got: " << fpga_result + << "\n"; + return 1; + } + } + } + + std::cout << "PASSED: The results are correct\n"; + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.hpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.hpp new file mode 100755 index 0000000000..cbcefd36b8 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.hpp @@ -0,0 +1,33 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include + +#include "pipe_array_internal.hpp" + +template +struct PipeArray { + PipeArray() = delete; + + template + struct StructId; + + template + struct VerifyIndices { + static_assert(sizeof...(idxs) == sizeof...(dims), + "Indexing into a PipeArray requires as many indices as " + "dimensions of the PipeArray."); + static_assert(VerifierDimLayer::template VerifierIdxLayer< + idxs...>::IsValid(), + "Index out of bounds"); + using VerifiedPipe = + cl::sycl::intel::pipe, BaseTy, depth>; + }; + + template + using PipeAt = typename VerifyIndices::VerifiedPipe; +}; diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array_internal.hpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array_internal.hpp new file mode 100755 index 0000000000..1b62f667f2 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array_internal.hpp @@ -0,0 +1,26 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +namespace { +template +struct VerifierDimLayer { + template + struct VerifierIdxLayer { + static constexpr bool IsValid() { + return idx1 < dim1 && + (VerifierDimLayer::template VerifierIdxLayer< + idxs...>::IsValid()); + } + }; +}; +template +struct VerifierDimLayer { + template + struct VerifierIdxLayer { + static constexpr bool IsValid() { return idx < dim; } + }; +}; +} // namespace diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/unroller.hpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/unroller.hpp new file mode 100755 index 0000000000..4bfb9422bd --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/pipe_array/src/unroller.hpp @@ -0,0 +1,15 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +template struct Unroller { + template static void Step(const Action &action) { + action(std::integral_constant()); + Unroller::Step(action); + } +}; + +template struct Unroller { + template static void Step(const Action &) {} +}; diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/CMakeLists.txt new file mode 100755 index 0000000000..367086979c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(RemoveLoopCarriedDependency) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/README.md new file mode 100755 index 0000000000..37e8edeeaf --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/README.md @@ -0,0 +1,176 @@ +# Removing Loop Carried Dependencies +This tutorial demonstrates how to remove a loop-carried dependency to improve the performance of FPGA device code. + +***Documentation***: The [FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a resource for general target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | A technique to remove loop carried dependencies from your FPGA device code, and when to apply it +| Time to complete | 25 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +This tutorial demonstrates how to remove a loop-carried dependency in FPGA device code. A snippet of the baseline unoptimized code (the `Unoptimized` function in `src/loop_carried_dependency.cpp`) is given below: + +``` +double sum = 0; +for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < N; j++) { + sum += a[i * N + j]; + } + sum += b[i]; +} +result[0] = sum; +``` + +In the unoptimized kernel, a sum is computed over two loops. The inner loop sums over the `a` data and the outer loop over the `b` data. Since the value `sum` is updated in both loops, this introduces a _loop carried dependency_ that causes the outer loop to be serialized, allowing only one invocation of the outer loop to be active at a time, which reduces performance. + +A snippet of the optimized code (the `Optimized` function in `src/loop_carried_dependency.cpp`) is given below, which removes the loop carried dependency on the `sum` variable: + +``` +double sum = 0; + +for (size_t i = 0; i < N; i++) { + // Step 1: Definition + double sum_2 = 0; + + // Step 2: Accumulation of array A values for one outer loop iteration + for (size_t j = 0; j < N; j++) { + sum_2 += a[i * N + j]; + } + + // Step 3: Addition of array B value for an outer loop iteration + sum += sum_2; + sum += b[i]; +} + +result[0] = sum; +``` + +The optimized kernel demonstrates the use of an independent variable `sum_2` that is not updated in the outer loop and removes the need to serialize the outer loop, which improves the performance. + +### When to Use This Technique +Look at the _Compiler Report > Throughput Analysis > Loop Analysis_ section in the reports. The report lists the II and details for each loop. The technique presented in this tutorial may be applicable if the _Brief Info_ of the loop shows _Serial exe: Data dependency_. The details pane may provide more information: +``` +* Iteration executed serially across _function.block_. Only a single loop iteration will execute inside this region due to data dependency on variable(s): + * sum (_filename:line_) +``` + +## Key Concepts +* Loop carried-dependencies, and their impact on FPGA DPC++ kernel performance +* An optimization technique to break loop-carried data dependencies in critical loops + +## License +This code sample is licensed under MIT license. + +## Building the `loop_carried_dependency` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A `build.ninja` file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + +### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `loop_carried_dependency_report.prj/reports` or in `loop_carried_dependency_s10_pac_report.prj/reports` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the _Loops Analysis_ view of the report (under _Throughput Analysis_) and observe that the loop in block `UnOptKernel.B1` is showing _Serial exe: Data dependency_. Click on the _source location_ field in the table to see the details for the loop. The maximum interleaving iterations of the loop is 1, as the loop is serialized. + +Now, observe that the loop in block `OptKernel.B1` is not marked as _Serialized_. The maximum Interleaving iterations of the loop is now 12. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./loop_carried_dependency.fpga_emu (Linux) + loop_carried_dependency.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./loop_carried_dependency.fpga (Linux) + ``` + +### Example of Output +``` +Number of elements: 16000 +Run: Unoptimized: +kernel time : 10685.3 ms +Run: Optimized: +kernel time : 2736.47 ms +PASSED +``` +### Discussion of Results + +In the tutorial example, applying the optimization yields a total execution time reduction by almost a factor of 4. The Initiation Interval (II) for the inner loop is 12 because a double floating point add takes 11 cycles on the FPGA. + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.sln new file mode 100755 index 0000000000..b319c23b37 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "loop_carried_dependency", "loop_carried_dependency.vcxproj", "{49E7063B-56DA-4ACF-B153-5B56A98645BE}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Debug|x64.ActiveCfg = Debug|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Debug|x64.Build.0 = Debug|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Release|x64.ActiveCfg = Release|x64 + {49E7063B-56DA-4ACF-B153-5B56A98645BE}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {CC320E26-0D79-434A-8E69-3F09BFB2FCF4} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.vcxproj new file mode 100755 index 0000000000..0ef4b0a338 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/loop_carried_dependency.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {49e7063b-56da-4acf-b153-5b56a98645be} + Win32Proj + loop_carried_dependency + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_carried_dependency.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_carried_dependency.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/sample.json new file mode 100755 index 0000000000..de8f0bb430 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "E5C1C1FA-7FDB-4C09-8096-1812080FD6D5", + "name": "Removing Loop Carried Dependencies", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial design demonstrating performance optimization by removing loop carried dependencies", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./loop_carried_dependency.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "loop_carried_dependency.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/CMakeLists.txt new file mode 100755 index 0000000000..e194b6f754 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/CMakeLists.txt @@ -0,0 +1,88 @@ +set(SOURCE_FILE loop_carried_dependency.cpp) +set(TARGET_NAME loop_carried_dependency) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/build.ninja new file mode 100755 index 0000000000..fbbdd87caf --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/build.ninja @@ -0,0 +1,30 @@ +source_file = loop_carried_dependency.cpp +target_name = loop_carried_dependency + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/loop_carried_dependency.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/loop_carried_dependency.cpp new file mode 100755 index 0000000000..ab391a42c5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency/src/loop_carried_dependency.cpp @@ -0,0 +1,174 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; +using namespace std; + +// Forward declare the kernel names +// (This will become unnecessary in a future compiler version.) +class UnOptKernel; +class OptKernel; + +event Unoptimized(queue &q, const vector &vec_a, + const vector &vec_b, double &result, size_t N) { + buffer b_a(vec_a); + buffer b_b(vec_b); + buffer b_result(&result, range(1)); + + auto e = q.submit([&](handler &h) { + auto a = b_a.get_access(h); + auto b = b_b.get_access(h); + auto result = b_result.get_access(h); + + h.single_task([=]() { + double sum = 0; + for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < N; j++) { + sum += a[i * N + j]; + } + sum += b[i]; + } + result[0] = sum; + }); + }); + return e; +} + +event Optimized(queue &q, const vector &vec_a, + const vector &vec_b, double &result, size_t N) { + buffer b_a(vec_a); + buffer b_b(vec_b); + buffer b_result(&result, range(1)); + + auto e = q.submit([&](handler &h) { + auto a = b_a.get_access(h); + auto b = b_b.get_access(h); + auto result = b_result.get_access(h); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + double sum = 0; + + for (size_t i = 0; i < N; i++) { + // Step 1: Definition + double sum_2 = 0; + + // Step 2: Accumulation of array A values for one outer loop iteration + for (size_t j = 0; j < N; j++) { + sum_2 += a[i * N + j]; + } + + // Step 3: Addition of array B value for an outer loop iteration + sum += sum_2; + sum += b[i]; + } + + result[0] = sum; + }); + }); + return e; +} + +void PrintTime(const event &e, queue &q, const char *kind) { + double start_k = e.get_profiling_info(); + double end_k = e.get_profiling_info(); + double kernel_time = (double)(end_k - start_k) * 1e-6; + + cout << "Run: " << kind << ":\n"; + cout << "kernel time : " << kernel_time << " ms\n"; +} + +int main(int argc, char *argv[]) { + size_t n = 16000; + + if (argc > 1) { + string option(argv[1]); + if (option == "-h" || option == "--help") { + cout << "Usage: \n\nFAILED\n"; + return 1; + } else { + n = stoi(option); + } + } + // Cap the value of n. + n = std::max(std::min((size_t)n, (size_t)16000), (size_t)100); + cout << "Number of elements: " << n << '\n'; + + vector vec_a(n * n); + vector vec_b(n); + + double answer = 0; + + // initialize data and compute golden result + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < n; j++) { + vec_a[i * n + j] = i + j; + answer += i + j; + } + vec_b[i] = i; + answer += i; + } + + // Initialize queue with device selector and enabling profiling + // Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; + cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector selector; +#endif + + double unopt_sum = -1, opt_sum = -1; + + try { + // Create a profiling queue + queue q(selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + // compute result on device + PrintTime(Unoptimized(q, vec_a, vec_b, unopt_sum, n), q, "Unoptimized"); + PrintTime(Optimized(q, vec_a, vec_b, opt_sum, n), q, "Optimized"); + + // q's destructor invokes q's exception handler on any device exceptions. + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // Check the results + bool failed = false; + if (unopt_sum != answer) { + cout << "Unoptimized: expected: " << answer << ", result: " << unopt_sum + << '\n'; + failed = true; + } + if (opt_sum != answer) { + cout << "Optimized: expected: " << answer << ", result: " << opt_sum + << '\n'; + failed = true; + } + + if (failed) { + cout << "FAILED\n"; + return 1; + } + cout << "PASSED\n"; + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/CMakeLists.txt new file mode 100755 index 0000000000..54283f46f7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(TriangularLoop) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/README.md new file mode 100755 index 0000000000..ad945c0d27 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/README.md @@ -0,0 +1,295 @@ + +# Triangular Loop Optimization + +This FPGA tutorial demonstrates an advanced technique to improve the performance of nested triangular loops with loop-carried dependencies in single-task kernels. + +***Documentation***: The [FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a resource for general target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How and when to apply the triangular loop optimization technique +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose + +This FPGA tutorial introduces an advanced optimization technique to improve the performance of nested triangular loops with loop-carried dependencies. Such structures are challenging to optimize because of the time-varying loop trip count. + +### What is a triangular loop? + +A triangular loop is a loop nest where the inner-loop range depends on the outer loop variable in such a way that the inner-loop trip-count shrinks or grows. This is best explained with an example: + +```c++ + for (int x = 0; x < n; x++) { + for (int y = x + 1; y < n; y++) { + local_buf[y] = local_buf[y] + SomethingComplicated(local_buf[x]); + } + } +``` + +In this example, the inner-loop executes fewer and fewer iterations as overall execution progresses. Each iteration of the inner-loop performs a read from index `[x]` and a read-modify-write on indices `[y]=x+1` to `[y]=n-1`. Expressed graphically (with _n_=10), these operations look like: + +```c++ + y=0 1 2 3 4 5 6 7 8 9 +========================== +x=0 o x x x x x x x x x +x=1 o x x x x x x x x +x=2 o x x x x x x x +x=3 o x x x x x x +x=4 o x x x x x +x=5 o x x x x +x=6 o x x x +x=7 o x x +x=8 o x +x=9 + +Legend: read="o", read-modify-write="x" +``` + +The picture is triangular in shape, hence the name "triangular loop". + +### Performance challenge + +In the above example, the table shows that in outer-loop iteration `x=0`, the program reads `local_buf[x=0]` and reads, modifies, and writes the values from `local_buf[y=1]` through `local_buf[y=9]`. This pattern of memory accesses results in a loop-carried dependency across the outer loop iterations. For example, the read at `x=2` depends on the value that was written at `x=1,y=2`. + +Generally, a new iteration is launched on every cycle as long as a sufficient number of inner-loop +iterations are executed *between* any two iterations that are dependent on one another. + +However, the challenge in the triangular loop pattern is that the trip-count of the inner-loop +progressively shrinks as `x` increments. In the worst case of `x=7`, the program writes to `local_buf[y=8]` in the first `y` iteration, but has only one intervening `y` iteration at `y=9` before the value must be read again at `x=8,y=8`. This may not allow enough time for the write operation to complete. The compiler compensates for this by increasing the initiation interval (II) of the inner-loop to allow more time to elapse between iterations. Unfortunately, this reduces the throughput of the inner-loop by a factor of II. + +A key observation is that this increased II is only functionally necessary when the inner-loop trip-count becomes small. Furthermore, the II of a loop is static -- it applies for all invocations of that loop. Therefore, if the *outer-loop* trip-count (_n_) is large, then most of the invocations of the inner-loop unnecessarily suffer the aforementioned throughput degradation. The optimization technique demonstrated in this tutorial addresses this issue. + +### Optimization concept + +The triangular loop optimization alters the code to guarantee that the trip count never falls below some minimum (_M_). This is accomplished by executing extra 'dummy' iterations of the inner loop when the *true* trip count falls below _M_. + +The purpose of the dummy iterations is to allow extra time for the loop-carried dependency to resolve. No actual computation (or side effects) take place during these added iterations. Note that the extra iterations are only executed on inner loop invocations that require them. When the inner-loop trip count is large, extra iterations are not needed. + +This technique allows the compiler to achieve II=1. + +Applying the triangular loop optimization to the original example, the post-optimization execution graph for _M_=6 (with _n_=10) appears as follows: + +```c++ + y=0 1 2 3 4 5 6 7 8 9 +========================== +x=0 o x x x x x x x x x +x=1 o x x x x x x x x +x=2 o x x x x x x x +x=3 o x x x x x x +x=4 o x x x x x +x=5 - o x x x x +x=6 - - o x x x +x=7 - - - o x x +x=8 - - - - o x +x=9 + <---M=6---> + +Legend: read="o", read-modify-write="x", dummy iteration="-" +``` + +### Selecting the value of _M_ + +The objective is to find the minimal value of _M_ that enables the compiler to achieve an II of 1. Any value of _M_ larger than this minimum adds unnecessary latency to the computation. + +A good starting point of the value of _M_ is the II of the unoptimized inner loop, which can be found in the "Loop Analysis" report of the unoptimized code. If the compiler can achieve II=1 with this starting value, experiment with reducing _M_ until II increases. If the compiler does not achieve II=1, increase _M_ until it does. This search for the optimal _M_ can be done quickly, as the compiler takes little time to generate the static optimization report. + +### Applying the optimization in code + +Here is the triangular loop optimization of the original code snippet: +```c++ +// Indices to track the execution in the merged loop +int x = 0, y = 1; + +// Total iterations of the merged loop +const int loop_bound = TotalIterations(M, n); + +[[intelfpga::ivdep(M)]] +for (int i = 0; i < loop_bound; i++) { + + // Determine if this is a real or dummy iteration + bool compute = y > x; + if (compute) { + local_buf[y] = local_buf[y] + SomethingComplicated(local_buf[x]); + } + + y++; + if (y == n) { + x++; + y = Min(n - M, x + 1); + } +} +``` +This requires some explanation! + +***Single loop:*** Notice that the original nested loop has been manually coalesced or "merged" into a single loop. The explicit `x` and `y` induction variables are employed to achieve the triangular iteration pattern. The actual computation inside the loop is guarded by the condition `y > x`. + +***Merged loop trip count:*** The total trip-count of this merged loop is `loop_bound` in the snippet . The value of `loop_bound` is the total number of iterations in the execution graph diagram, which is a function of _n_ and _M_. + +To derive the expression for `TotalIterations(M, n)`, consider the iterations as consisting of the following two triangles of "real" and "dummy" iterations. + +```c++ + y=0 1 2 3 4 5 6 7 8 9 y=0 1 2 3 4 5 6 7 8 9 +========================= ========================= +x=0 o x x x x x x x x x x=0 +x=1 o x x x x x x x x x=1 +x=2 o x x x x x x x x=2 +x=3 o x x x x x x x=3 +x=4 o x x x x x + x=4 +x=5 o x x x x x=5 - +x=6 o x x x x=6 - - +x=7 o x x x=7 - - - +x=8 o x x=8 - - - - +x=9 + <(M-2)> + <---M=6---> +``` +The number of "real" iterations on the left is 10+9+8+7+6+5+4+3+2 = 54. The formula for a +descending series from `n` is `n*(n+1)/2`. Since there is no iteration at `x=9,y=9`, subtract 1 (i.e., `n*(n+1)/2 - 1`). When _n_=10, this expression yields 54, as expected. + +The number of dummy iterations on the right is 4+3+2+1 = 10. The largest number in this series is _M_-2. Using the same formula for a descending series , you get `(M-2)*(M-1)/2`. For _M_=6, this this expression yields 4*5/2 = 10, as expected. + +Summing the number of real and dummy iterations gives the total iterations of the merged loop. + +***Use of ivdep***: Since the loop is restructured to ensure that a minimum of M iterations are executed, the `[[intelfpga::ivdep(M)]]` is used to hint to the compiler that iterations with dependencies are always separated by at least M iterations. + + + +## Key Concepts +* The triangular loop advanced optimization technique, and situations in which it is applicable +* Using `ivdep safelen` to convey the broken loop-carried dependency to the compiler + +## License +This code sample is licensed under MIT license. + + +## Building the `triangular_loop` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `triangular_loop_report.prj/reports/` or `triangular_loop_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Consult the "Loop Analysis" report to compare the optimized and unoptimized versions of the loop. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./triangular_loop.fpga_emu (Linux) + triangular_loop.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./triangular_loop.fpga (Linux) + ``` + +### Example of Output + +``` +Platform name: Intel(R) FPGA SDK for OpenCL(TM) +Device name: pac_a10 : Intel PAC Platform (pac_ec00000) + + +Length of input array: 8192 + +Beginning run without triangular loop optimization. + +Verification PASSED + +Execution time: 4.240185 seconds +Throughput without optimization: 30.187364 MB/s + +Beginning run with triangular loop optimization. + +Verification PASSED + +Execution time: 0.141516 seconds +Throughput with optimization: 904.489876 MB/s + +``` + +### Discussion of Results +A test compile of this tutorial design achieved an fMAX of approximately 210 MHz on the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. The results with and without the optimization are shown in the following table: + +Configuration | Overall Execution Time (ms) | Throughput (MB/s) +-|-|- +Without optimization | 4972 | 25.7 +With optimization | 161 | 796.6 + +Without optimization, the compiler achieved an II of 30 on the inner-loop. With the optimization, the compiler achieves an II of 1 and the throughput increased by approximately 30x. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/sample.json new file mode 100755 index 0000000000..7dc1d09170 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "884439A5-0286-447B-9E6D-A7C22B61CED8", + "name": "Triangular Loop Optimization", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating an advanced optimization technique for triangular loops", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./triangular_loop.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "triangular_loop.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt new file mode 100755 index 0000000000..04d6c7add8 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt @@ -0,0 +1,88 @@ +set(SOURCE_FILE triangular_loop.cpp) +set(TARGET_NAME triangular_loop) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/build.ninja new file mode 100755 index 0000000000..f13a484a51 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/build.ninja @@ -0,0 +1,30 @@ +source_file = triangular_loop.cpp +target_name = triangular_loop + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp new file mode 100755 index 0000000000..d3a5386bd6 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp @@ -0,0 +1,255 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +// Seed for randomizing data inputs +constexpr int kInitSeed = 42; + +// This tutorial runs twice to show the impact with +// and without the optimization. +constexpr int kNumRuns = 2; + +// number of nanoseconds in a second +constexpr double kNs = 1000000000.0; + +// Number of inputs. Don't set this too large, otherwise +// computation of the reference solution will take a long time on +// the host (the time is proportional to kSize^2) +constexpr int kSize = 8 * 1024; + +// >=1. Minimum number of iterations of the inner loop that must be +// executed in the optimized implementation. Set this approximately +// equal to the ii of inner loop in the unoptimized implementation. +constexpr int kM = 50; + +// do not use with unary operators, e.g., kMin(x++, y++) +constexpr int Min(int X, int Y) { return (((X) < (Y)) ? (X) : (Y)); }; + +// Forward declaration of kernel +class Task; + +// This method represents the operation you perform on the loop-carried variable +// in the triangular loop (i.e. a dot product or something that may take many +// cycles to complete). +int SomethingComplicated(int x) { return (int)sycl::sqrt((float)x); } + +// This kernel function implements two data paths: with and without the +// optimization. 'optimize' specifies which path to take. +void TriangularLoop(std::unique_ptr& q, buffer& input_buf, + buffer& output_buf, uint32_t n, event& e, + bool optimize) { + // Enqueue kernel + e = q->submit([&](handler& h) { + // Get accessors to the SYCL buffers + auto input = input_buf.get_access(h); + auto output = output_buf.get_access(h); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + // See README for description of the loop_bound calculation. + const int real_iterations = (n * (n + 1) / 2 - 1); + const int extra_dummy_iterations = (kM - 2) * (kM - 1) / 2; + const int loop_bound = real_iterations + extra_dummy_iterations; + + // Local memory for the buffer to be operated on + uint32_t local_buf[kSize]; + + // Read the input_buf from global mem and load it into the local mem + for (uint32_t i = 0; i < kSize; i++) { + local_buf[i] = input[i]; + } + + // Perform the triangular loop computation + + if (!optimize) { // Unoptimized loop. + + for (int x = 0; x < n; x++) { + for (int y = x + 1; y < n; y++) { + local_buf[y] = local_buf[y] + SomethingComplicated(local_buf[x]); + } + } + + } else { // Optimized loop. + + // Indices to track the execution inside the single, merged loop. + int x = 0, y = 1; + + // Specify that the minimum dependence-distance of loop-carried + // variables is kM iterations. We ensure this is true by modifying the y + // index such that a minimum of kM iterations are always executed. + [[intelfpga::ivdep(kM)]] for (int i = 0; i < loop_bound; i++) { + // Determine if this iteration is a dummy iteration or a real + // iteration in which the computation should be performed. + bool compute = y > x; + // Perform the computation if needed. + if (compute) { + local_buf[y] = local_buf[y] + SomethingComplicated(local_buf[x]); + } + // Figure out the next value for the indices. + y++; + + // If we've hit the end, set y such that a minimum of kM + // iterations are exected. + if (y == n) { + x++; + y = Min(n - kM, x + 1); + } + } + } + + // Write the output to global mem + for (uint32_t i = 0; i < kSize; i++) { + output[i] = local_buf[i]; + } + }); + }); + +} + +int main() { + + // Host and kernel profiling + event e; + ulong t1_kernel, t2_kernel; + double time_kernel; +// Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; + std::cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector device_selector; +#endif + + try { + auto prop_list = + property_list{property::queue::enable_profiling()}; + + std::unique_ptr q; + q.reset(new queue(device_selector, dpc_common::exception_handler, prop_list)); + + platform platform = q->get_context().get_platform(); + device device = q->get_device(); + std::cout << "Platform name: " + << platform.get_info().c_str() << "\n"; + std::cout << "Device name: " + << device.get_info().c_str() << "\n\n\n"; + + // Create input and output buffers + auto input_buf = buffer(range<1>(kSize)); + auto output_buf = buffer(range<1>(kSize)); + + srand(kInitSeed); + + // Compute the reference solution + uint32_t gold[kSize]; + + { + // Get host-side accessors to the SYCL buffers. + auto input_host = input_buf.get_access(); + + // Initialize random input + for (int i = 0; i < kSize; ++i) { + input_host[i] = rand() % 256; + } + + for (int i = 0; i < kSize; ++i) { + gold[i] = input_host[i]; + } + } + + // Host accessor now out-of-scope and is destructed. This is required in + // order to unblock the kernel's subsequent accessor to the same buffer. + + for (int x = 0; x < kSize; x++) { + for (int y = x + 1; y < kSize; y++) { + gold[y] += SomethingComplicated(gold[x]); + } + } + + std::cout << "Length of input array: " << kSize << "\n\n"; + + for (int i = 0; i < kNumRuns; i++) { + switch (i) { + case 0: { + std::cout + << "Beginning run without triangular loop optimization.\n\n"; + TriangularLoop(q, input_buf, output_buf, kSize, e, false); + break; + } + case 1: { + std::cout << "Beginning run with triangular loop optimization.\n\n"; + TriangularLoop(q, input_buf, output_buf, kSize, e, true); + break; + } + default: { + TriangularLoop(q, input_buf, output_buf, kSize, e, false); + } + } + + // Wait for kernels to finish + q->wait(); + + t1_kernel = e.get_profiling_info(); + t2_kernel = e.get_profiling_info(); + time_kernel = (t2_kernel - t1_kernel) / kNs; + + // Get accessor to output buffer. Accessing the buffer at this point in + // the code will block on kernel completion. + auto output_host = output_buf.get_access(); + + // Verify output and print pass/fail + bool passed = true; + int num_errors = 0; + for (int b = 0; b < kSize; b++) { + if (num_errors < 10 && output_host[b] != gold[b]) { + passed = false; + std::cout << " Mismatch at element " << b << ". expected " << gold[b] + << ")\n"; + num_errors++; + } + } + + if (passed) { + std::cout << "Verification PASSED\n\n"; + + // Report host execution time and throughput + std::cout.setf(std::ios::fixed); + std::cout << "Execution time: " << time_kernel << " seconds\n"; + int num_iterations = + kSize * (kSize + 1) / 2 - + 1; // One piece of data is processed on each iteration. This + // formula is taken from the loop_bound calculation. + double N_MB = (sizeof(uint32_t) * num_iterations) / + (1024 * 1024); // Amount of data processed, in mB + std::cout << "Throughput " << (i == 0 ? "without" : "with") + << " optimization: " << N_MB / time_kernel << " MB/s\n\n"; + } else { + std::cout << "Verification FAILED\n"; + return 1; + } + } + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.sln new file mode 100755 index 0000000000..dba49d0132 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "triangular_loop", "triangular_loop.vcxproj", "{B9324A38-DD67-4220-9EC3-42A8ACBDC4F5}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B9324A38-DD67-4220-9EC3-42A8ACBDC4F5}.Debug|x64.ActiveCfg = Debug|x64 + {B9324A38-DD67-4220-9EC3-42A8ACBDC4F5}.Debug|x64.Build.0 = Debug|x64 + {B9324A38-DD67-4220-9EC3-42A8ACBDC4F5}.Release|x64.ActiveCfg = Release|x64 + {B9324A38-DD67-4220-9EC3-42A8ACBDC4F5}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {AF287516-09DE-4A70-AF44-3C4F5D850105} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.vcxproj new file mode 100755 index 0000000000..6d5fc1777b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/triangular_loop/triangular_loop.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {b9324a38-dd67-4220-9ec3-42a8acbdc4f5} + Win32Proj + triangular_loop + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)triagular_loop.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)triagular_loop.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/CMakeLists.txt new file mode 100755 index 0000000000..125d32c072 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(MemoryAttributesOverview) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/README.md new file mode 100755 index 0000000000..799c4bcf8b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/README.md @@ -0,0 +1,182 @@ + +# Avoiding Aliasing of Kernel Arguments +This tutorial explains the `kernel_args_restrict` attribute and its effect on the performance of FPGA kernels. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | The problem of *pointer aliasing* and its impact on compiler optimizations
The behavior of the `kernel_args_restrict` attribute and when to use it on your kernel
The effect this attribute can have on your kernel's performance on FPGA +| Time to complete | 20 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +Due to pointer aliasing, the compiler must be conservative about optimizations that reorder, parallelize or overlap operations that could alias. This tutorial demonstrates the use of the DPC++ `[[intel::kernel_args_restrict]]` kernel attribute, which should be applied any time you can guarantee that kernel arguments do not alias. This attribute enables more aggressive compiler optimizations and often improves kernel performance on FPGA. + + +### What Is Pointer Aliasing? +Pointer aliasing occurs when the same memory location can be accessed using different *names* (i.e. variables). For example, consider the code below. Here, the variable `pi` can be changed one of three ways: `pi=3.14159`, `*a=3.14159` or `*b=3.14159`. In general, the compiler has to be conservative about which accesses may alias to each other and avoid making optimizations that reorder and/or parallelize operations. + +```c++ +float pi = 3.14; +float *a = π +float *b = a; +``` +### Pointer Aliasing of Arguments +Consider the function illustrated below. Though the intention of the code is clear to the reader, the compiler cannot guarantee that `in` does not alias with `out`. Imagine a degenerate case where the function was called: like this `myCopy(ptr, ptr+1, 10)`. This would cause `in[i]` and `out[i+1]` to alias to the same address, for all `i` from 0 to 9. +```c++ +void myCopy(int *in, int *out, size_t int size) { + for(size_t int i = 0; i < size; i++) { + out[i] = in[i]; + } +} +``` +This possibility of aliasing forces the compiler to be conservative. Without more information from the developer, it cannot make any optimizations which overlap, vectorize or reorder the assignment operations. Doing so would result in functionally incorrect behavior if the compiled function is called with aliasing pointers. + +If this code is compiled to FPGA, the performance penalty of this conservatism is severe. The loop in `myCopy` cannot be pipelined, because the next iteration of the loop cannot begin until the current iteration has completed. + +### A Promise to the Compiler +The developer often knows that pointer arguments will never alias in practice, as with the `myCopy` function. In your DPC++ program, you can use the `[[intel::kernel_args_restrict]]` attribute to inform the compiler that none of a kernel's arguments will alias to any another, thereby enabling more aggressive optimizations. If the non-aliasing assumption is violated at runtime, the result will be undefined behavior. + +C and OpenCL programmers may recognize this concept as the `restrict` keyword. + +### Tutorial Code Description +In this tutorial, we will show how to use the `kernel_args_restrict` attribute for your kernel and the effect it has on performance. We show two kernels that perform the same function; one with the `[[intel::kernel_args_restrict]]` applied to it and the other without. The function of the kernel is simple: copy the contents of one buffer to another. We will analyze the effect of the `[[intel::kernel_args_restrict]]` attribute on the performance of the kernel by analyzing the loop II in the reports and the latency of the kernel on actual hardware. + +## Key Concepts +* The problem of *pointer aliasing* and its impact on compiler optimizations +* The behavior of the `kernel_args_restrict` attribute and when to use it on your kernel +* The effect this attribute can have on your kernel's performance on FPGA + +## License +This code sample is licensed under MIT license. + +## Building the `kernel_args_restrict` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `kernel_args_restrict_report.prj/reports/` or `kernel_args_restrict_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the *Loop Analysis* report (*Throughput Analysis* > *Loop Analysis*). In the *Loop List pane* you should see two kernels: one is the kernel without the attribute applied (*KernelArgsNoRestrict*) and the other with the attribute applied (*KernelArgsRestrict*). Each kernel each has a single for-loop, which appears in the *Loop List* pane. Click on the loop under each kernel to see how it was optimized by the compiler. + +Compare the loop initiation interval (II) between the two kernels. Notice that the loop in the *KernelArgsNoRestrict* kernel has a large estimated II, while the loop in the *KernelArgsRestrict* kernel has an estimated II of ~1. These IIs are estimates because the latency of global memory accesses vary with runtime conditions. + +For the *KernelArgsNoRestrict* kernel, the compiler assumed that the kernel arguments can alias each other. Since`out[i]` and `in[i+1]` could be the same memory location, the compiler cannot overlap the iteration of the loop performing `out[i] = in[i]` with the next iteration of the loop performing `out[i+1] = in[i+1]` (and likewise for iterations `in[i+2]`, `in[i+3]`, ...). This results in an II equal to the latency of the global memory read of `in[i]` plus the latency of the global memory write to `out[i]`. + +We can confirm this by looking at the details of the loop. Click on the *KernelArgsNoRestrict* kernel in the *Loop List* pane and then click on the loop in the *Loop Analysis* pane. Now consider the *Details* pane below. You should see something like: + +- *Compiler failed to schedule this loop with smaller II due to memory dependency* + - *From: Load Operation (kernel_args_restrict.cpp: 74 > accessor.hpp: 945)* + - *To: Store Operation (kernel_args_restrict.cpp: 74)* +- *Most critical loop feedback path during scheduling:* + - *144.00 clock cycles Load Operation (kernel_args_restrict.cpp: 74 > accessor.hpp: 945)* + - *42.00 clock cycles Store Operation (kernel_args_restrict.cpp: 74)* + +The first bullet (and its sub-bullets) tell you that there is memory dependency between the load and store operations in the loop. This is the conservative pointer aliasing memory dependency described earlier. The second bullet shows you the estimated latencies for the load and store operations (note that these are board-dependent). The sum of these two latencies (plus 1) is the II of the loop. + +Next, look at the loop details of the *KernelArgsRestrict* kernel. You will notice that the *Details* pane doesn't show a memory dependency. The usage of the `[[intel::kernel_args_restrict]]` attribute allowed the compiler to schedule a new iteration of the for-loop every cycle since it knows that accesses to `in` and `out` will never alias. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./kernel_args_restrict.fpga_emu (Linux) + kernel_args_restrict.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./kernel_args_restrict.fpga (Linux) + ``` + +### Example of Output +``` +Kernel throughput without attribute: 8.06761 MB/s +Kernel throughput with attribute: 766.873 MB/s +PASSED +``` + +### Discussion of Results + +The throughput observed when running the kernels with and without the `kernel_args_restrict` attribute should reflect the difference in loop II seen in the reports. The ratios will not exactly match because the loop IIs are estimates. An example ratio (compiled and run on the Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA) is shown. + +Attribute used? | II | Kernel Throughput (MB/s) +------------- | ------------- | -------- +No | ~187 | 8 +Yes | ~1 | 767 + +Note that this performance difference will be apparent only when running on FPGA hardware. The emulator, while useful for verifying functionality, will generally not reflect differences in performance. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.sln new file mode 100755 index 0000000000..7fd1d9a291 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kernel_args_restrict", "kernel_args_restrict.vcxproj", "{D6A634E7-9F2B-46C2-A21C-2402F631A55A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Debug|x64.ActiveCfg = Debug|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Debug|x64.Build.0 = Debug|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Release|x64.ActiveCfg = Release|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {4AC13DD2-5B0F-4051-93BF-85AEAF6E50C9} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.vcxproj new file mode 100755 index 0000000000..7b0b629cf5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/kernel_args_restrict.vcxproj @@ -0,0 +1,155 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + 15.0 + {d6a634e7-9f2b-46c2-a21c-2402f631a55a} + Win32Proj + kernel_args_restricts + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/sample.json new file mode 100755 index 0000000000..45a85e30a9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "86066897-498B-41C5-BFA3-A03D3CAE2503", + "name": "Avoiding Aliasing of Kernel Arguments", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "Explain the `kernel_args_restrict` attribute and its effect on the performance of FPGA kernels.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./kernel_args_restrict.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "kernel_args_restrict.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/CMakeLists.txt new file mode 100755 index 0000000000..0a4f13cefc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/CMakeLists.txt @@ -0,0 +1,94 @@ +set(SOURCE_FILE kernel_args_restrict.cpp) +set(TARGET_NAME kernel_args_restrict) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# report +if(WIN32) + set(REPORT ${TARGET_NAME}_report.a) + + add_custom_target(report DEPENDS ${REPORT}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} COPYONLY) + + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) + +else() + set(REPORT ${TARGET_NAME}_report.a) + + add_custom_target(report DEPENDS ${REPORT}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/build.ninja new file mode 100755 index 0000000000..5213ba0f55 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/build.ninja @@ -0,0 +1,30 @@ +source_file = kernel_args_restrict.cpp +target_name = kernel_args_restrict + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/kernel_args_restrict.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/kernel_args_restrict.cpp new file mode 100755 index 0000000000..550f122ece --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/kernel_args_restrict/src/kernel_args_restrict.cpp @@ -0,0 +1,134 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +// problem input size +constexpr size_t kInSize = 1000000; +constexpr double kInputMB = (kInSize * sizeof(int)) / (1024 * 1024); +constexpr int kRandMax = 7777; + +// Forward declare the kernel names +// (This will become unnecessary in a future compiler version.) +class KernelArgsRestrict; +class KernelArgsNoRestrict; + +// Return the execution time of the event, in seconds +double GetExecutionTime(const event &e) { + double start_k = e.get_profiling_info(); + double end_k = e.get_profiling_info(); + double kernel_time = (end_k - start_k) * 1e-9; // ns to s + return kernel_time; +} + +void RunKernels(size_t size, std::vector &in, std::vector &nr_out, + std::vector &r_out) { + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + // create the SYCL device queue + queue q(device_selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer in_buf(in); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer nr_out_buf(nr_out.data(), size); + buffer r_out_buf(r_out.data(), size); + + // submit the task that DOES NOT apply the kernel_args_restrict attribute + auto e_nr = q.submit([&](handler &h) { + auto in_acc = in_buf.get_access(h); + auto out_acc = nr_out_buf.get_access(h); + + h.single_task([=]() { + for (size_t i = 0; i < size; i++) { + out_acc[i] = in_acc[i]; + } + }); + }); + + // submit the task that DOES apply the kernel_args_restrict attribute + auto e_r = q.submit([&](handler &h) { + auto in_acc = in_buf.get_access(h); + auto out_acc = r_out_buf.get_access(h); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (size_t i = 0; i < size; i++) { + out_acc[i] = in_acc[i]; + } + }); + }); + + // measure the execution time of each kernel + double nr_time = GetExecutionTime(e_nr); + double r_time = GetExecutionTime(e_r); + + std::cout << "Kernel throughput without attribute: " << (kInputMB / nr_time) + << " MB/s\n"; + std::cout << "Kernel throughput with attribute: " << (kInputMB / r_time) + << " MB/s\n"; + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } +} + +int main() { + // seed the random number generator + srand(0); + + // input/output data + std::vector in(kInSize); + std::vector nr_out(kInSize), r_out(kInSize); + + // generate some random input data + for (size_t i = 0; i < kInSize; i++) { + in[i] = rand() % kRandMax; + } + + // Run the kernels + RunKernels(kInSize, in, nr_out, r_out); + + // validate the results + for (size_t i = 0; i < kInSize; i++) { + if (in[i] != nr_out[i]) { + std::cout << "FAILED: mismatch at entry " << i + << " of 'KernelArgsNoRestrict' kernel output\n"; + return 1; + } + } + for (size_t i = 0; i < kInSize; i++) { + if (in[i] != r_out[i]) { + std::cout << "FAILED: mismatch at entry " << i + << " of 'KernelArgsRestrict' kernel output\n"; + return 1; + } + } + + std::cout << "PASSED\n"; + + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/CMakeLists.txt new file mode 100755 index 0000000000..8ab3aa3653 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(LoopCoalesce) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/README.md new file mode 100755 index 0000000000..4b2530a96a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/README.md @@ -0,0 +1,167 @@ + +# Coalescing Nested Loops +This FPGA tutorial demonstrates applying the `loop_coalesce` attribute to a nested loop in a task kernel to reduce the area overhead. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | What the `loop_coalesce` attribute does
How `loop_coalesce` attribute affects resource usage and loop throughput
How to apply the `loop_coalesce` attribute to loops in your program
Which loops make good candidates for coalescing +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +The `loop_coalesce` attribute enables you to direct the compiler to combine nested loops into a single loop. The attribute `[[intelfpga::loop_coalesce(N)]]` takes an integer argument `N`, that specifies how many nested loop levels that you want the compiler to attempt to coalesce. + +**NOTE**: If you specify `[[intelfpga::loop_coalesce(1)]]` on nested loops, the compiler does not attempt to coalesce any of the nested loops. +### Example: Coalescing Two Loops + +``` +[[intelfpga::loop_coalesce(2)]] +for (int i = 0; i < N; i++) + for (int j = 0; j < M; j++) + sum[i][j] += i+j; +``` +The compiler coalesces the two loops together so that they execute as if they were a single loop written as follows: + +``` +int i = 0; +int j = 0; +while(i < N){ + sum[i][j] += i+j; + j++; + if (j == M){ + j = 0; + i++; + } +} +``` + +### Identifying Which Loops to Coalesce +Generally, coalescing loops can help reduce area usage by reducing the overhead needed for loop control. However, in some circumstances, coalescing loops can reduce kernel throughput. Scenarios where the `loop_coalesce` attribute can be applied to save area without a loss of throughput are those where: + + 1. The loops being coalesced have the same initiation interval (II). + 2. The exit condition computation for the resulting coalesced look is not complicated. + +If the innermost coalesced loop has a very small trip count, `loop_coalesce` might actually improve throughput. + + +## Key Concepts +* Description of the `loop_coalesce` attribute +* How `loop_coalesce` attribute affects resource usage and loop throughput +* How to apply the `loop_coalesce` attribute to loops in your program +* Determining which loops make good candidates for coalescing + +## License +This code sample is licensed under MIT license. + + +## Building the `loop_coalesce` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `loop_coalesce_report.prj/reports/` or `loop_coalesce_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +On the main report page, scroll down to the section titled `Compile Estimated Kernel Resource Utilization Summary`. Each kernel name ends in the loop_coalesce attribute argument used for that kernel, e.g., KernelCompute<2> uses a loop_coalesce argument of 2. You can verify that the number of registers, MLABs and DSPs used for each kernel decreases after nested loops are coalesced. + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./loop_coalesce.fpga_emu (Linux) + loop_coalesce.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./loop_coalesce.fpga (Linux) + ``` + +### Example of Output + +``` +Loop Coalesce: 1 -- kernel time : 156 microseconds +Throughput for kernel with coalesce_factor 1: 6550KB/S +Loop Coalesce: 2 -- kernel time : 113 microseconds +Throughput for kernel with coalesce_factor 2: 9064KB/S +PASSED: The results are correct + +``` + +### Discussion of Results +The execution time and throughput for each kernel is displayed. Applying the `loop_coalesce` attribute in this example reduced the kernel execution time by a factor of ~1.5. Note that you will only see this result when executing on FPGA hardware. The emulator will generally not reflect performance differences. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.sln new file mode 100755 index 0000000000..ba59611875 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "loop_coalesce", "loop_coalesce.vcxproj", "{CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.ActiveCfg = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.Build.0 = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.ActiveCfg = Release|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {92BEFAAB-0365-4E5A-9C4A-E50AB49B2A6B} + EndGlobalSection +EndGlobal \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.vcxproj new file mode 100755 index 0000000000..ee6a1746e4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/loop_coalesce.vcxproj @@ -0,0 +1,161 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {cf6a576b-665d-4f24-bb62-0dae7a7b3c64} + Win32Proj + loop_coalesce + 10.0.17763.0 + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_coalesce.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_coalesce.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/sample.json new file mode 100755 index 0000000000..c43debe7c9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "370A5B2B-EBB3-4E7F-89F3-73D333522215", + "name": "Coalescing Nested Loops", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating the loop_coalesce attribute", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./loop_coalesce.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "loop_coalesce.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/CMakeLists.txt new file mode 100755 index 0000000000..bf71de4094 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/CMakeLists.txt @@ -0,0 +1,88 @@ +set(SOURCE_FILE loop_coalesce.cpp) +set(TARGET_NAME loop_coalesce) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/build.ninja new file mode 100755 index 0000000000..edc74950ec --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/build.ninja @@ -0,0 +1,30 @@ +source_file = loop_coalesce.cpp +target_name = loop_coalesce + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/loop_coalesce.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/loop_coalesce.cpp new file mode 100755 index 0000000000..a779bec4b1 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_coalesce/src/loop_coalesce.cpp @@ -0,0 +1,176 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +// Matrix dimensions +constexpr size_t kNumRows = 4; +constexpr size_t kNumCols = 4; +constexpr size_t kNumElements = kNumRows * kNumCols; + +// Total floating point ops performed by the kernel +constexpr size_t kTotalOps = (4 + (3*kNumCols)) * kNumElements; + + +// Forward declare the kernel name +// (This will become unnecessary in a future compiler version.) +template class KernelCompute; + +// The kernel implements a matrix multiplication. +// This is not meant to be a high performance implementation on FPGA! +// It's just a simple kernel with nested loops to illustrate loop coalescing. +template +void MatrixMultiply(const device_selector &selector, + const std::vector &matrix_a, + const std::vector &matrix_b, + std::vector &res) { + double kernel_time = 0.0; + try { + auto prop_list = property_list{property::queue::enable_profiling()}; + + queue q(selector, dpc_common::exception_handler, prop_list); + + buffer buffer_in_a(matrix_a); + buffer buffer_in_b(matrix_b); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer buffer_out(res.data(), kNumElements); + + event e = q.submit([&](handler &h) { + auto accessor_matrix_a = buffer_in_a.get_access(h); + auto accessor_matrix_b = buffer_in_b.get_access(h); + auto accessor_res = buffer_out.get_access(h); + + // The kernel_args_restrict promises the compiler that this kernel's + // accessor arguments won't alias (i.e. non-overlapping memory regions). + h.single_task>( + [=]() [[intel::kernel_args_restrict]] { + size_t idx = 0; + float a[kNumRows][kNumCols]; + float b[kNumRows][kNumCols]; + float tmp[kNumRows][kNumCols]; + + // The loop_coalesce instructs the compiler to attempt to "merge" + // coalesce_factor loop levels of this nested loop together. + // For example, a coalesce_factor of 2 turns this into a single loop. + [[intelfpga::loop_coalesce(coalesce_factor)]] + for (size_t i = 0; i < kNumRows; ++i) { + for (size_t j = 0; j < kNumCols; ++j) { + a[i][j] = accessor_matrix_a[idx]; + b[i][j] = accessor_matrix_b[idx]; + tmp[i][j] = 0.0; + idx++; + } + } + + // Applying loop_coalesce to the outermost loop of a deeply nested + // loop results coalescing from the outside in. + // For example, a coalesce_factor of 2 coalesces the "i" and "j" loops, + // making a doubly nested loop. + [[intelfpga::loop_coalesce(coalesce_factor)]] + for (size_t i = 0; i < kNumRows; ++i) { + for (size_t j = 0; j < kNumCols; ++j) { + float sum = 0.0f; + for (size_t k = 0; k < kNumCols; ++k) { + sum += a[i][k] * b[k][j]; + } + tmp[i][j] = sum; + } + } + + idx = 0; + [[intelfpga::loop_coalesce(coalesce_factor)]] + for (size_t i = 0; i < kNumRows; ++i) { + for (size_t j = 0; j < kNumCols; ++j) { + accessor_res[idx] = tmp[i][j]; + idx++; + } + } + + }); + }); + + // Kernel profiling data + double start = e.get_profiling_info(); + double end = e.get_profiling_info(); + // convert nanoseconds to microseconds + kernel_time = (double)(end - start) * 1e-3; + + } catch (exception const &exc) { + std::cout << "Caught synchronous SYCL exception:\n" << exc.what() << '\n'; + if (exc.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + std::cout << "Loop Coalesce: " << coalesce_factor + << " -- kernel time : " << kernel_time << " microseconds\n"; + std::cout << "Throughput for kernel with coalesce_factor " << coalesce_factor + << ": "; + std::cout << std::fixed << std::setprecision(0) + << (((double)kTotalOps * sizeof(float) * 1e-3f) / + (kernel_time * 1e-6f)) << "KB/s\n"; +} + +int main() { + std::vector matrix_a(kNumElements); + std::vector matrix_b(kNumElements); + std::vector matrix_output_no_col(kNumElements); + std::vector matrix_output(kNumElements); + + // Specify the matrices to be multiplied + for (size_t i = 0; i < kNumRows; i++) { + size_t pos = i * kNumCols; + // Initialize A as identity matrix + matrix_a[i + pos] = 1.0; + for (size_t j = 0; j < kNumCols; j++) { + matrix_b[pos + j] = i * j + 1; + } + } + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; +#else + intel::fpga_selector selector; +#endif + + // Two versions of the simple matrix multiply kernel will be enqueued: + // - with coalesce_factor=1 (i.e. no loop coalescing) + // - with coalesce_factor=2 (coalesce two nested levels) + MatrixMultiply<1>(selector, matrix_a, matrix_b, matrix_output_no_col); + MatrixMultiply<2>(selector, matrix_a, matrix_b, matrix_output); + + // Correctness check + bool passed = true; + for (size_t i = 0; i < kNumRows; i++) { + size_t pos = i * kNumCols; + for (size_t j = 0; j < kNumCols; j++) { + float val_noCol = matrix_output_no_col[pos + j]; + float val = matrix_output[pos + j]; + if (val_noCol != i * j + 1 || val != i * j + 1) { + std::cout << "FAILED: The results are incorrect\n"; + passed = false; + } + } + } + + if (passed) { + std::cout << "PASSED: The results are correct\n"; + return 0; + } else { + std::cout << "FAILED\n"; + return -1; + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/CMakeLists.txt new file mode 100755 index 0000000000..3805253a8e --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(LoopIvdep) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/README.md new file mode 100755 index 0000000000..2cd79d752d --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/README.md @@ -0,0 +1,251 @@ + + +# Loop `ivdep` Attribute +This FPGA tutorial demonstrates how to applying the `ivdep` attribute to a loop to aid the compiler's loop dependence analysis. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Basics of loop-carried dependencies
The notion of a loop-carried dependence distance
What constitutes a *safe* dependence distance
How to aid the compiler's dependence analysis to maximize performance +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +In order to understand and apply `ivdep` to loops in your design, it is necessary to understand the concepts of loop-carried memory dependencies. Unlike many other attributes that can be used to improve a design's performance, `ivdep` has functional implications. Using it incorrectly will result in undefined behavior for your design! + +### Loop-Carried Memory Dependencies +A *loop-carried memory dependency* refers to a situation where memory access in a given loop iteration cannot proceed until a memory access from a previous loop iteration is completed. Loop-carried dependencies can be categorized into the following cases: +* **True-dependence (Read-After-Write)** - A memory location read in an iteration that must occur after a previous iteration writes to the same memory location. +* **Anti-dependence (Write-After-Read)** - A memory location read must occur before a future iteration writes to the same memory location. +* **Output-dependence (Write-After-Write)** - A memory location write must occur before a future iteration writes to the same memory location. + +The Intel® oneAPI DPC++ Compiler (Beta) employs static analysis to scan the program's code to establish the dependence relationships between all memory accesses in a loop. However, depending on the complexity of the addressing expressions and the loop's stride or upper bound, the compiler may not be able to statically determine precise dependence information. + +In such scenarios, the compiler must conservatively assume some statements to be dependent in order to guarantee functional correctness of the generated hardware. Precise dependence information is crucially important to generate an efficient pipelined datapath. Such information reduces the number of assumed dependencies, allowing the hardware schedule to extract as much pipeline parallelism from loops as possible. + +#### Example 1: Basic true-dependence +Each iteration of the loop reads a value from memory location that is written to in the previous iteration. The pipelined datapath generated by the compiler cannot issue a new iteration until the previous iteration is complete. + +```c++ +for(i = 1; i < n; i++){ + S: a[i] = a[i-1]; +} +``` + +#### Example 2: Complex or statically-unknown indexing expression +The compiler cannot statically infer the true access pattern for the loads from array `a`. To guarantee functional correctness, the compiler must conservatively assume the statements in the loop to be dependent across all iterations. The resulting generated datapath issues new iterations, similar to the example 1, executing one iteration at a time. +```c++ +for(i = 0; i < n; i++){ + S: a[i] = a[b[i]]; +} +``` + +#### Example 3: Loop-independent dependence +Some memory dependencies in program code do not span multiple iterations of a loop. In the following example code, dependencies from statement `S2` on `S1` and from statement `S3` on `S1` are referred to as loop-independent memory dependencies. Such dependencies do not prevent the compiler from generating an efficient pipelined loop datapath and are not considered in this tutorial. +```c++ +for(i = 0; i < n; i++){ + S1: a[i] = foo(); + ... + S2: b[i] = a[i]; +} +for(j = 0; j < m; j++){ + S3: A[i] = bar(); +} +``` + +### Loop-carried dependence distance +Imagine loop-carried dependencies in terms of the distance between the dependence source and sink statements, measured in the number of iterations of the loop containing the statements. In example 1, the dependence source (store into array `a`) and dependence sink (load from the same index in array `a`) are one iteration apart. That is, for the specified memory location, the data is read one iteration after it was written. Therefore, this true dependence has a distance of 1. In many cases, the compiler loop dependence analysis may be able to statically determine the dependence distance. + +#### Example 4: Simple dependence distance +The compiler's static analysis facilities can infer that the distance of the true dependence in the following example code is 10 iterations. This has an impact on the scheduling of how iterations of the loop are issued into the generated pipelined datapath. For example, iteration `k` may not begin executing the load from array `a` before iteration `(k-10)` has completed storing the data into the same memory location. However, iterations `[k-9,k)` do not incur the scheduling constraint on the store in iteration `(k-10)` and begin execution earlier. +```c++ +for(i = 1; i < n; i++){ + S: a[i] = a[i-10]; +} +``` + +#### Example 5: Dependence distance across multiple loops in a nest +Statement `S`, in the code snippet that follows, forms two distinct true dependencies, one carried by loop `L1` and one by loop `L2`. Across iterations of loop `L1`, data is stored into a location in array `a` that is read in the next iteration. Similarly, across iterations of loop `L2`, data is stored into a location in array `a` that is read in a later iteration. In the latter case, the dependence across loop `L2` has dependence distance of 2. In the former, the dependence distance across loop `L1` has dependence distance of 1. Special care must be taken when reasoning about loop-carried memory dependencies spanning multiple loops. +```c++ +L1: for(i = 1; i < n; i++){ + L2: for(j = 1; j < m; j++){ + S: a[i][j] = a[i-1][j-2]; + } +} +``` + +### Specifying that memory accesses do *not* cause loop-carried dependencies +Apply the `ivdep` attribute to a loop to inform the compiler that ***none*** of the memory accesses within a loop incur loop-carried dependencies. +```c++ +[[intelfpga::ivdep]] +for (int i = 0; i < N; i++) { + A[i] = A[i - X[i]]; +} +``` +The `ivdep` attribute indicates to the compiler that it can disregard assumed loop-carried memory dependencies and generate a pipelined datapath for this loop capable of issuing new iterations as soon as possible (every cycle), maximizing possible throughput. + +### Specifying that memory accesses do *not* cause loop-carried dependencies across a fixed distance +Apply the `ivdep` attribute with a `safelen` parameter to set a specific lower bound on the dependence distance that can possibly be attributed to loop-carried dependencies in the associated loop. +```c++ +// n is a constant expression of integer type +[[intelfpga::ivdep(n)]] +for (int i = 0; i < N; i++) { + A[i] = A[i - X[i]]; +} +``` +The `ivdep` attribute informs the compiler to generate a pipelined loop datapath that can issue a new iteration as soon as the iteration `n` iterations ago has completed. The attribute parameter (`safelen`) is a refinement of the compiler static loop-carried dependence analysis that infers the dependence present in the code but is otherwise unable to accurately determine its distance. + +***IMPORTANT***: Applying the `ivdep` attribute or the `ivdep` attribute with a `safelen` parameter may lead to incorrect results if the annotated loop exhibits loop-carried memory dependencies. The attribute directs the compiler to generate hardware assuming no loop-carried dependencies. Specifying this assumption incorrectly is an invalid use of the attribute, and results in undefined (and likely incorrect) behavior. + +### Testing the Tutorial +In `loop_ivdep.cpp`, the `ivdep` attribute is applied to the kernel work loop with a `safelen` parameter of 1 and 128. +```c++ + TransposeAndFold(selector, A, B); // kMinSafelen = 1 + TransposeAndFold(selector, A, C); // kMaxSafelen = 128 +``` +The `ivdep` attribute with `safelen` parameter equal to 1 informs the compiler that iterations of the associated loop do not form a loop-carried memory dependence with a distance of at least 1. That is, the attribute is redundant and is equivalent to the code without the attribute in place. + +**_Try this!_**: Compile the tutorial program in `loop_ivdep.cpp` with and without the `[[intelfpga::ivdep]]` attribute altogether and compare the resulting reports. + +The `ivdep` attribute with `safelen` parameter equal to 128 is reflective of the maximum number of iterations of the associated loop among which no loop-carried memory dependence occurs. The annotated loop nest contains a dependence on values of array `temp_buffer`: + +```c++ +for (size_t j = 0; j < kMatrixSize * kRowLength; j++) { + for (size_t i = 0; i < kRowLength; i++) { + temp_buffer[j % kRowLength][i] += in_buffer[i][j % kRowLength]; + } +} +``` +Observe that the indexing expression on `temp_buffer` evaluates to the same index every `kRowLength` iterations of the `j` loop. Specifying the `ivdep` attribute on the `j` loop without a `safelen` parameter, or with a `safelen` parameter >= `kRowLength` leads to undefined behavior because the generated hardware does not adhere to the ordering constraint imposed by the dependence. Specifying the `ivdep` attribute with a `safelen` attribute <= `kRowLength` is valid and will result in a better performing end result. + +## Key Concepts +* Basics of loop-carried dependencies +* The notion of a loop-carried dependence distance +* Determining what constitutes a *safe* dependence distance +* How to aid the compiler's dependence analysis to maximize performance + +## License +This code sample is licensed under MIT license. + + +## Building the `loop_ivdep` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `loop_ivdep_report.prj/reports/` or `loop_ivdep_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the Loops Analysis section of the optimization report and look at the initiation interval (II) achieved by the two version of the kernel. +* **`safelen(1)`** The II reported for this version of the kernel is 5 cycles. +You should see a message similar to "Compiler failed to schedule this loop with smaller II due to memory dependency." +* **`safelen(128)`** The II reported for this version of the kernel is 1 cycle, the optimal result. You should see a message similar to "a new iteration is issued into the pipelined loop datapath on every cycle". + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./loop_ivdep.fpga_emu (Linux) + loop_ivdep.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./loop_ivdep.fpga (Linux) + ``` + +### Example of Output + +``` +SAFELEN: 1 -- kernel time : 50.9517 ms +Throughput for kernel with SAFELEN 1: 1286KB/s +SAFELEN: 128 -- kernel time : 10 ms +Throughput for kernel with SAFELEN 128: 6277KB/s +PASSED: The results are correct +``` + +### Discussion of Results + +The following table summarizes the execution time (in ms) and throughput (in MFlops) for `safelen` parameters of 1 (redundant attribute) and 128 (`kRowLength`) for a default input matrix size of 128 x 128 floats on Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA and the Intel® oneAPI DPC++ Compiler (Beta). + +Safelen | Kernel Time (ms) | Throughput (KB/s) +------------- | ------------- | ----------------------- +1 | 50 | 1320 +128 | 10 | 6403 + +With the `ivdep` attribute applied with the maximum safe `safelen` parameter, the kernel execution time is decreased by a factor of ~5. + +Note that this performance difference will be apparent only when running on FPGA hardware. The emulator, while useful for verifying functionality, will generally not reflect differences in performance. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.sln new file mode 100755 index 0000000000..5f1a9b42a8 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "loop_ivdep", "loop_ivdep.vcxproj", "{3F5364B3-F987-4676-89A5-1F19BA3D4B75}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.ActiveCfg = Debug|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Debug|x64.Build.0 = Debug|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.ActiveCfg = Release|x64 + {3F5364B3-F987-4676-89A5-1F19BA3D4B75}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C0550E85-8C31-40EE-BFFA-F267DC16329D} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.vcxproj new file mode 100755 index 0000000000..ed0fb51757 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/loop_ivdep.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {3f5364b3-f987-4676-89a5-1f19ba3d4b75} + Win32Proj + loop_ivdep + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_ivdep.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)loop_ivdep.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/sample.json new file mode 100755 index 0000000000..b020452df4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "CD8FE0A5-B31A-4906-8386-27416361FE24", + "name": "Loop IVDep Attribute", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating the usage of the loop ivdep attribute", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./loop_ivdep.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "loop_ivdep.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/CMakeLists.txt new file mode 100755 index 0000000000..03d22779ce --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE loop_ivdep.cpp) +set(TARGET_NAME loop_ivdep) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/build.ninja new file mode 100755 index 0000000000..f076e77a88 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/build.ninja @@ -0,0 +1,30 @@ +source_file = loop_ivdep.cpp +target_name = loop_ivdep + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/loop_ivdep.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/loop_ivdep.cpp new file mode 100755 index 0000000000..f2ddc838ac --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_ivdep/src/loop_ivdep.cpp @@ -0,0 +1,127 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include "dpc_common.hpp" + +constexpr size_t kRowLength = 128; +constexpr size_t kMinSafelen = 1; +constexpr size_t kMaxSafelen = kRowLength; +constexpr size_t kMatrixSize = kRowLength * kRowLength; + +using namespace sycl; + +template class KernelCompute; + +template +void TransposeAndFold(const device_selector &selector, + const std::array &m_input, + std::array &m_output) { + double kernel_time = 0; + try { + queue q(selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer buffer_input(m_input); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer buffer_output(m_output.data(), kMatrixSize); + + event e = q.submit([&](handler &h) { + auto accessor_input = buffer_input.get_access(h); + auto accessor_output = buffer_output.get_access(h); + + h.single_task>([=]() + [[intel::kernel_args_restrict]] { + float in_buffer[kRowLength][kRowLength]; + float temp_buffer[kRowLength][kRowLength]; + + // Initialize local buffers + for (size_t i = 0; i < kMatrixSize; i++) { + in_buffer[i / kRowLength][i % kRowLength] = accessor_input[i]; + temp_buffer[i / kRowLength][i % kRowLength] = 0; + } + + // No iterations of the following loop store data into the same memory + // location that are less than kRowLength iterations apart. + // The ivdep here instructs the compiler that it can safely assume no + // loop-carried dependencies over safe_len consecutive iterations. + [[intelfpga::ivdep(safe_len)]] + for (size_t j = 0; j < kMatrixSize * kRowLength; j++) { + #pragma unroll + for (size_t i = 0; i < kRowLength; i++) { + temp_buffer[j % kRowLength][i] += in_buffer[i][j % kRowLength]; + } + } + + // Write result to output + for (size_t i = 0; i < kMatrixSize; i++) { + accessor_output[i] = temp_buffer[i / kRowLength][i % kRowLength]; + } + }); + }); + + double start = e.get_profiling_info(); + double end = e.get_profiling_info(); + + // unit is nano second, convert to ms + kernel_time = (double)(end - start) * 1e-6; + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + std::cout << "safe_len: " << safe_len << " -- kernel time : " << kernel_time + << " ms\n"; + std::cout << "Throughput for kernel with safe_len " << safe_len << ": "; + std::cout << std::fixed << std::setprecision(0) + << (((double)kMatrixSize * sizeof(float) * 1e-3f) / + (kernel_time * 1e-3f)) << "KB/s\n"; +} + +int main() { + std::array A, B, C; + + // Initialize input with random data + for (size_t i = 0; i < kMatrixSize; i++) { + A[i] = static_cast(rand()) / static_cast(RAND_MAX); + } + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; +#else + intel::fpga_selector selector; +#endif + + // Instantiate kernel logic with the min and max correct safelen parameter + // to compare performance. + TransposeAndFold(selector, A, B); + TransposeAndFold(selector, A, C); + // You can also try removing the ivdep from the kernel entirely and + // recompiling to see what effect this has on performance. + + // Verify result + for (size_t i = 0; i < kMatrixSize; i++) { + if (B[i] != C[i]) { + std::cout << "FAILED: The results are incorrect" << '\n'; + return 1; + } + } + std::cout << "PASSED: The results are correct" << '\n'; + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/CMakeLists.txt new file mode 100755 index 0000000000..e281d8cb1a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(MaxConcurrency) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/README.md new file mode 100755 index 0000000000..68d681dd31 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/README.md @@ -0,0 +1,172 @@ +# Maximum Concurrency of a Loop +This FPGA tutorial explains how to use the max_concurrency attribute for loops. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | The basic usage of the `max_concurrency` attribute
How the `max_concurrency` attribute affects loop throughput and resource use
How to apply the `max_concurrency` attribute to loops in your program
How to identify the correct `max_concurrency` factor for your program +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +This tutorial demonstrates a simple example of applying the `max_concurrency` attribute to a loop in a task kernel to trade off the on-chip memory use and throughput of the loop. + +### Description of the `max_concurrency` Attribute +The `max_concurrency` attribute is a loop attribute that enables you to control the number of simultaneously executed loop iterations. To enable this simultaneous execution, the compiler creates copies of any memory that is private to a single iteration. These copies are called private copies. The greater the permitted concurrency, the more private copies the compiler must create. + +#### Example: + +Kernels in this tutorial design apply `[[intelfpga::max_concurrency(N)]]` to an outer loop that contains two inner loops, which perform a partial sum computation on an input array, storing the results in a private (to the outer loop) array `a1`. The following is an example of a loop nest: + +``` +[[intelfpga::max_concurrency(1)]] +for (size_t i = 0; i < max_iter; i++) { + float a1[size]; + for (int j = 0; j < size; j++) + a1[j] = accessorA[i * 4 + j] * shift; + for (int j = 0; j < size; j++) + result += a1[j]; +} +``` + +In this example, the maximum concurrency allowed for the outer loop is 1, that is, only one iteration of the outer loop is allowed to be simultaneously executing at any given moment. The `max_concurrency` attribute in this example forces the compiler to create exactly one private copy of the array `a1`. Passing the parameter `N` to the `max_concurrency` attribute limits the concurrency of the loop to `N` simultaneous iterations, and `N` private copies of privately-declared arrays in that loop. + +### Identifying the Correct `max_concurrency` Factor +Generally, increasing the maximum concurrency allowed for a loop through the use of the `max_concurrency` attribute increases the throughput of that loop at the cost of increased memory resource use. Additionally, in nearly all cases, there is a point at which increasing the maximum concurrency does not have any further effect on the throughput of the loop, as the maximum exploitable concurrency of that loop has been achieved. + +The correct `max_concurrency` factor for a loop depends on the goals of your design, the criticality of the loop in question, and its impact on the overall throughput of your design. A typical design flow may be to: +1. Experiment with different values of `max_concurrency`. +2. Observe what impact the values have on the overall throughput and memory use of your design. +3. Choose the appropriate value that allows you to achive your desired throughput and area goals. + +## Key Concepts +* The basic usage of the `max_concurrency` attribute +* How the `max_concurrency` attribute affects loop throughput and resource use +* How to apply the `max_concurrency` attribute to loops in your program +* How to identify the correct `max_concurrency` factor for your program + +## License +This code sample is licensed under MIT license. + +## Building the `max_concurrency` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the FPGA hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `max_concurrency_report.prj/reports/` or `max_concurrency_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +On the main report page, scroll down to the section titled "Estimated Resource Usage". Each kernel name ends in the max_concurrency attribute argument used for that kernel, e.g., `kernelCompute1` uses a max_concurrency attribute value of 1. You can verify that the number of RAMs used for each kernel increases with the max_concurrency value used, with the exception of max_concurrency 0, which instructs the compiler to choose a default value. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./max_concurrency.fpga_emu (Linux) + max_concurrency.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./max_concurrency.fpga (Linux) + ``` + + +### Example of Output +``` +Max concurrency 0 kernel time : 1459.89 ms +Throughput for kernel with max_concurrency 0: 0.561 GFlops +Max concurrency 1 kernel time : 2890.810 ms +Throughput for kernel with max_concurrency 1: 0.283 GFlops +Max concurrency 2 kernel time : 1460.227 ms +Throughput for kernel with max_concurrency 2: 0.561 GFlops +Max concurrency 4 kernel time : 1459.970 ms +Throughput for kernel with max_concurrency 4: 0.561 GFlops +Max concurrency 8 kernel time : 1460.034 ms +Throughput for kernel with max_concurrency 8: 0.561 GFlops +Max concurrency 16 kernel time : 1459.901 ms +Throughput for kernel with max_concurrency 16: 0.561 GFlops +PASSED: The results are correct +``` + +### Discussion of Results + +The stdout output shows the giga-floating point operations per second (GFlops) for each kernel. + +When run on the Intel® PAC with Intel Arria10® 10 GX FPGA hardware board, we see that the throughput doubles from using max_concurrency 1 to max_concurrency 2, after which increasing the value of max_concurrency does not increase the GFlops achieved, i.e., increasing the max_concurrency above 2 will spend additional RAM resources for no additional throughput gain. As such, for this tutorial design, maximal throughput is best achieved by using max_concurrency 2. + +Using max_concurrency 0 (or equivalently omitting the attribute entirely) also produced good throughput, indicating that the compiler's default heuristic chose a concurrency of 2 or higher in this case. + +When run on the FPGA emulator, the max_concurrency attribute has no effect on runtime. You may notice that the emulator achieved higher throughput than the FPGA in this example. This is because this trivial example uses only a tiny fraction of the spacial compute resources available on the FPGA. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.sln new file mode 100755 index 0000000000..761fdc2009 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "max_concurrency", "max_concurrency.vcxproj", "{F0CE4972-62AF-4B9F-996F-1D1DB14D76B7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F0CE4972-62AF-4B9F-996F-1D1DB14D76B7}.Debug|x64.ActiveCfg = Debug|x64 + {F0CE4972-62AF-4B9F-996F-1D1DB14D76B7}.Debug|x64.Build.0 = Debug|x64 + {F0CE4972-62AF-4B9F-996F-1D1DB14D76B7}.Release|x64.ActiveCfg = Release|x64 + {F0CE4972-62AF-4B9F-996F-1D1DB14D76B7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3247AB7C-282F-4907-B1F4-E944349A8835} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.vcxproj new file mode 100755 index 0000000000..49b65a1722 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/max_concurrency.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {f0ce4972-62af-4b9f-996f-1d1db14d76b7} + Win32Proj + max_concurrency + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)max_concurrency.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)max_concurrency.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/sample.json new file mode 100755 index 0000000000..9a9253ef17 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "7909FAE1-D3D4-4E97-A963-14A884F33495", + "name": "Maximum Concurrency of a Loop", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "How to use the max_concurrency attribute for single_task loops", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./max_concurrency.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "max_concurrency.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/CMakeLists.txt new file mode 100755 index 0000000000..24d6d8302a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/CMakeLists.txt @@ -0,0 +1,90 @@ +set(SOURCE_FILE max_concurrency.cpp) +set(TARGET_NAME max_concurrency) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +set(AOC_SEED_FLAG "-Xsseed=4 -Xsparallel=2") +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${AOC_SEED_FLAG} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/build.ninja new file mode 100755 index 0000000000..b3a66f686b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/build.ninja @@ -0,0 +1,30 @@ +source_file = max_concurrency.cpp +target_name = max_concurrency + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -Xsseed=3 +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/max_concurrency.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/max_concurrency.cpp new file mode 100755 index 0000000000..cec706dd17 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/max_concurrency/src/max_concurrency.cpp @@ -0,0 +1,187 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +constexpr size_t kSize = 8192; +constexpr size_t kMaxIter = 50000; +constexpr size_t kTotalOps = 2 * kMaxIter * kSize; +constexpr size_t kMaxValue = 128; + +using FloatArray = std::array; +using FloatScalar = std::array; + +template class Compute; + +// Launch a kernel on the device specified by selector. +// The kernel's functionality is designed to show the +// performance impact of the max_concurrency attribute. +template +void PartialSumWithShift(const device_selector &selector, + const FloatArray &array, float shift, + FloatScalar &result) { + double kernel_time = 0.0; + + try { + + queue q(selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer buffer_array(array); + buffer buffer_result(result.data(), 1); + + event e = q.submit([&](handler &h) { + auto accessor_array = buffer_array.get_access(h); + auto accessor_result = buffer_result.get_access(h); + + h.single_task>([=]() + [[intel::kernel_args_restrict]] { + float r = 0; + + // At most concurrency iterations of the outer loop will be + // active at one time. + // This limits memory usage, since each iteration of the outer + // loop requires its own copy of a1. + [[intelfpga::max_concurrency(concurrency)]] + for (size_t i = 0; i < kMaxIter; i++) { + float a1[kSize]; + for (size_t j = 0; j < kSize; j++) + a1[j] = accessor_array[(i * 4 + j) % kSize] * shift; + for (size_t j = 0; j < kSize; j++) + r += a1[j]; + } + accessor_result[0] = r; + }); + }); + + // SYCL event profiling allows the kernel execution to be timed + double start = e.get_profiling_info(); + double end = e.get_profiling_info(); + kernel_time = (double)(end - start) * 1e-6; + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // The performance of the kernel is measured in GFlops, based on: + // 1) the number of floating-point operations performed by the kernel. + // This can be calculated easily for the simple example kernel. + // 2) the kernel execution time reported by SYCL event profiling. + std::cout << "Max concurrency " << concurrency << " " + << "kernel time : " << kernel_time << " ms\n"; + std::cout << "Throughput for kernel with max_concurrency " << concurrency + << ": "; + std::cout << std::fixed << std::setprecision(3) + << ((double)(kTotalOps) / kernel_time) / 1e6f << " GFlops\n"; +} + +// Calculates the expected results. Used to verify that the kernel +// is functionally correct. +float GoldenResult(const FloatArray &A, float shift) { + float gr = 0; + for (size_t i = 0; i < kMaxIter; i++) { + float a1[kSize]; + for (size_t j = 0; j < kSize; j++) + a1[j] = A[(i * 4 + j) % kSize] * shift; + for (size_t j = 0; j < kSize; j++) + gr += a1[j]; + } + return gr; +} + +int main() { + bool success = true; + + FloatArray A; + FloatScalar R0, R1, R2, R3, R4, R5; + + float shift = (float)(rand() % kMaxValue); + + // initialize the input data + for (size_t i = 0; i < kSize; i++) + A[i] = rand() % kMaxValue; + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; +#else + intel::fpga_selector selector; +#endif + + // Run the kernel with different values of the max_concurrency + // attribute, to determine the optimal concurrency. + // In this case, the optimal max_concurrency is 2 since this + // achieves the highest GFlops. Higher values of max_concurrency + // consume additional RAM without increasing GFlops. + PartialSumWithShift<0>(selector, A, shift, R0); + PartialSumWithShift<1>(selector, A, shift, R1); + PartialSumWithShift<2>(selector, A, shift, R2); + PartialSumWithShift<4>(selector, A, shift, R3); + PartialSumWithShift<8>(selector, A, shift, R4); + PartialSumWithShift<16>(selector, A, shift, R5); + + // compute the actual result here + float gr = GoldenResult(A, shift); + + // verify the results are correct + if (gr != R0[0]) { + std::cout << "Max Concurrency 0: mismatch: " << R0[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (gr != R1[0]) { + std::cout << "Max Concurrency 1: mismatch: " << R1[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (gr != R2[0]) { + std::cout << "Max Concurrency 2: mismatch: " << R2[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (gr != R3[0]) { + std::cout << "Max Concurrency 4: mismatch: " << R3[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (gr != R4[0]) { + std::cout << "Max Concurrency 8: mismatch: " << R4[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (gr != R5[0]) { + std::cout << "Max Concurrency 16: mismatch: " << R5[0] << " != " << gr + << " (kernel != expected)" << '\n'; + success = false; + } + + if (success) { + std::cout << "PASSED: The results are correct\n"; + return 0; + } + + return 1; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/CMakeLists.txt new file mode 100755 index 0000000000..125d32c072 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(MemoryAttributesOverview) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/README.md new file mode 100755 index 0000000000..1884982c77 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/README.md @@ -0,0 +1,277 @@ + +# On-Chip Memory Attributes +This FPGA tutorial demonstrates how to use on-chip memory attributes to control memory structures in your DPC++ program. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | The basic concepts of on-chip memory attributes
How to apply memory attributes in your program
How to confirm that the memory attributes were respected by the compiler
A case study of the type of performance/area trade-offs enabled by memory attributes +| Time to complete | 30 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +For each private or local array in your DPC++ FPGA device code, the Intel® oneAPI DPC++ Compiler creates a custom memory system in your program's datapath to contain the contents of that array. The compiler has many options to choose from when architecting this on-chip memory structure. Memory attributes are a set of DPC++ extensions for FPGA that enable you to override the compiler's internal heuristics and to control the architecture of kernel memory. + +### Introduction to Memory Attributes + +To maximize kernel throughput, your design's datapath should have stall-free accesses to all of its memory systems. A memory read or write is said to be *stall-free* if the compiler can prove that it has contention-free access to a memory port. A memory system is stall-free if all of its accesses have this property. Wherever possible, the compiler will try to create a minimum-area, stall-free memory system. + +If a different area performance trade-off is desired, or if the compiler fails to find the best configuration, you can use memory attributes to override the compiler’s decisions and specify the memory configuration you need. + +Memory attributes can be applied to any variable or array defined within the kernel and to struct data members in struct declarations. The compiler supports the following memory attributes: + +| Memory Attribute | Description +--- |--- +| intelfpga::register | Forces a variable or array to be carried through the pipeline in registers. +| intelfpga::memory("`impl_type`") | Forces a variable or array to be implemented as embedded memory. The optional string parameter `impl_type` can be `BLOCK_RAM` or `MLAB`. +| intelfpga::numbanks(N) | Specifies that the memory implementing the variable or array must have N memory banks. +| intelfpga::bankwidth(W) | Specifies that the memory implementing the variable or array must be W bytes wide. +| intelfpga::singlepump | Specifies that the memory implementing the variable or array should be clocked at the same rate as the accesses to it. +| intelfpga::doublepump | Specifies that the memory implementing the variable or array should be clocked at twice the rate as the accesses to it. +| intelfpga::max_replicates(N) | Specifies that a maximum of N replicates should be created to enable simultaneous reads from the datapath. +| intelfpga::private_copies(N) | Specifies that a maximum of N private copies should be created to enable concurrent execution of N pipelined threads. +| intelfpga::simple_dual_port | Specifies that the memory implementing the variable or array should have no port that services both reads and writes. +| intelfpga::merge("`key`", "`type`") | Merge two or more variables or arrays in the same scope width-wise or depth-wise. All variables with the same `key` string are merged into the same memory system. The string `type` can be either `width` or `depth`. +| intelfpga::bank_bits(b0,b1,...,bn) | Specifies that the local memory addresses should use bits (b0,b1,...,bn) for bank-selection, where (b0,b1,...,bn) are indicated in terms of word-addressing. The bits of the local memory address not included in (b0,b1,...,bn) will be used for word-selection in each bank. + + +#### Example 1: Applying memory attributes to private arrays +```c++ +q.submit([&](handler &h) { + h.single_task([=]() { + // Create a kernel memory 8 bytes wide (2 integers per memory word) + // and split the contents into 2 banks (each bank will contain 32 + // integers in 16 memory words). + [[intelfpga::bankwidth(8), intelfpga::numbanks(2)]] int a[64]; + + // Force array 'b' to be carried live in the data path using + // registers. + [[intelfpga::register]] int b[64]; + + // Merge 'mem_A' and 'mem_B' width-wise so that they are mapped + // to the same kernel memory system, + [[intelfpga::merge("mem", "width")]] unsigned short mem_A[64]; + [[intelfpga::merge("mem", "width")]] unsigned short mem_B[64]; + + // ... + }); +}); + +``` + +#### Example 2: Applying memory attributes to struct data members +```c++ +// Memory attributes can be specified for struct data members +// within the struct declaration. +struct State { + [[intelfpga::numbanks(2)]] int mem[64]; + [[intelfpga::register]] int reg[8]; +}; + +q.submit([&](handler &h) { + h.single_task([=]() { + // The compiler will create two memory systems from S1: + // - S1.mem[64] implemented in kernel memory that has 2 banks + // - S1.reg[8] implemented in registers + State S1; + + // In this case, we have attributes on struct declaration as + // well as struct instantiation. When this happpens, the outer + // level attribute takes precendence. Here, the compiler will + // generate a single memory system for S2 which will have 4 + // banks. + [[intelfpga::numbanks(4)]] State S2; + + // ... + }); +}); + +``` + +### Tutorial Code Overview +This tutorial demonstrates the trade-offs between choosing a single-pumped and double-pumped memory system for your kernel. We will apply the attributes `[[intelfpga::singlepump]]` and `[[intelfpga::doublepump]]` to the two dimensional array `dict_offset`. + +The tutorial enqueues three versions of the same kernel: +* `dict_offset` is single-pumped +* `dict_offset` is double-pumped +* `dict_offset` unconstrained (compiler heuristics choose the memory configuration) + +For both single-pumped and double-pumped versions, additional memory attributes direct the compiler to implement `dict_offset` in MLABs (as the size of the array is small), to using `kVec` banks, and to confine the number of replicates in each bank to no more than `kVec`. + +### Accesses to `dict_offset` + +Array `dict_offset` has the following accesses: + + * **Initialization**: It is initialized by copying the contents of global memory `dict_offset_init` using `kVec` writes. + * **Reads** : It is read from `kVec*kVec` times. + * **Writes**: There are `kVec` writes updating the values at some indices. + +After all loops are unrolled, the innermost dimension of every access is known at compile time (e.g. `dict_offset[i][k]` becomes `dict_offset[i][0]`, `dict_offset[i][1]`, etc.). + +### Banks and replicates of `dict_offset` + +If we partition the memory system such that array elements `dict_offset[:][0]` (where `:` denotes all indices in range) are contained in Bank 0, `dict_offset[:][1]` are contained in Bank 1, and so on, each access is confined to a single bank. This partitioning is achieved by requesting the compiler to generate `kVec` banks. + +In total, there are `kVec` reads from each bank. To make these reads stall-free, we request `kVec` replicates per bank so that (if needed) each read can occur simultaneously from a separate replicate. Since all replicates in a bank must contain identical data, a write to a bank must go to all replicates. + +For single-pumped memories, each replicate has 2 physical ports. In the tutorial code, one of these ports is used for writing and one for reading. The compiler must generate `kVec` replicates per bank to create stall-free accesses for `kVec` reads. + +For double-pumped memories, each replicate effectively has 4 ports, three of which are available for reads. Hence, the compiler needs fewer replicates per bank to create stall-free reads. However, this can incur a system fMAX penalty. + +The choice of attributes will be further discussed in the [Examining the Reports](#examining-the-reports) section. + + +## Key Concepts +* The basic concepts of on-chip memory attributes +* How to apply memory attributes in your program +* How to confirm that the memory attributes were respected by the compiler +* A case study of the type of performance/area trade-offs enabled by memory attributes + +## License +This code sample is licensed under MIT license. + +## Building the `memory_attributes` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `memory_attributes_report.prj/reports/` or `memory_attributes_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the Kernel Memory Viewer (System Viewers > Kernel Memory Viewer). In the Kernel Memory List pane, click on `dict_offset` under the function `KernelCompute`, for each of +* N=0 : unconstrained configuration (compiler's choice) +* N=1 : single-pumped configuration +* N=2 : double-pumped configuration + +This view provides information about the memory configuration. The user-specified memory attributes are listed in the "Details" pane. + +### Comparing the memory configurations + +For both single-pumped and double-pumped versions of the kernel, the compiler generates `kVec` banks and implements the memory in MLABs, as was requested through memory attributes. The main difference between these two memory systems is the number of replicates within each bank. To see the number of replicates per bank, click any bank label (say Bank 0) under `dict_offset`. + +For the single-pumped memory system, the compiler created 4 replicates per bank, whereas for the double-pumped memory system, the compiler created 2 replicates per bank. A single-pumped replicate has 2 physical ports and a double-pumped replicates has 4 (effective) physical ports. For this reason, the compiler required twice as many replicates to create a stall-free system in the single-pumped version as compared to the double-pumped version. + +### Area implications + +This also means that the FPGA resources needed to generate the stall-free memory systems differ between the two versions. In the report, navigate to the Area Analysis of System view (Area Analysis > Area Analysis of System) and click "Expand All". For the single-pumped version, you can see that the compiler used 32 MLABs to implement the memory system for `dict_offset`, whereas for the double-pumped version, the compiler used only 16 MLABs. However, the double-pumped version of the memory required additional ALUTs and FFs to implement the double-pumping logic. + +In general, double-pumped memories are more area-efficient than single-pumped memories. + +### fMAX implications + +The use of double-pumped memories can impact the fMAX of your system. Double-pumped memories have to be clocked at twice the frequency of the rest of the datapath, and the resulting cross-clock domain transfer can reduce fMAX. The effect is particularly pronounced when double-pumping MLABs. + +In this tutorial, both the single-pumped and double-pumped version of the kernel share a single clock domain, so the difference in fMAX cannot be directly observed in the report. + +If you want to observe the fMAX effect, modify the code to enqueue only the single-pumped (or only the double-pumped) version of the kernel. Only the report generated from a full FPGA compile (`make fpga`) will provide fMAX information. + +The table that follows summarizes the fMAX achieved when compiling single-kernel variants of the tutorial design to an on Intel® PAC with Intel® Arria® 10 GX FPGA. + +Variant | Fmax (MHz) | \# MLABs in `dict_offset` +------------- | ------------- | -------- +Single-pumped | 307.9 | 32 +Double-pumped | 200.0 | 16 + +Note that the numbers reported in the table will vary slightly from compile to compile. + +### Trade-offs +There are often many ways to generate a stall-free memory system. As a programmer, the implementation you choose depends on your design constraints. + + - If your design is limited by the available memory resources (block RAMs and MLABs), using double-pumped memory systems can help your design fit in the FPGA device. + - If the fMAX of your design is limited by double-pumped memory systems in your kernel, forcing all memory systems to be single-pumped might increase the fMAX. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./memory_attributes.fpga_emu (Linux) + memory_attributes.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./memory_attributes.fpga (Linux) + ``` + +### Example of Output +``` +PASSED: all kernel results are correct. +``` + +### Discussion + +Feel free to experiment further with the tutorial code. You can: + - Change the memory implementation type to block RAMs (using `[[intelfpga::memory("BLOCK_RAM")]]`) or registers (using `[[intelfpga::register]]`) to see how it affects the area and fMAX of the tutorial design. + - Vary `kRows` and/or `kVec` (both in powers of 2) to see how it effects the trade-off between single-pumped and double-pumped memories. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.sln new file mode 100755 index 0000000000..3f1de9b8b8 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "memory_attributes", "memory_attributes.vcxproj", "{D6A634E7-9F2B-46C2-A21C-2402F631A55A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Debug|x64.ActiveCfg = Debug|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Debug|x64.Build.0 = Debug|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Release|x64.ActiveCfg = Release|x64 + {D6A634E7-9F2B-46C2-A21C-2402F631A55A}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {4AC13DD2-5B0F-4051-93BF-85AEAF6E50C9} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.vcxproj new file mode 100755 index 0000000000..f797c91ef5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/memory_attributes.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {d6a634e7-9f2b-46c2-a21c-2402f631a55a} + Win32Proj + memory_attributes + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)memory_attributes.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)memory_attributes.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/sample.json new file mode 100755 index 0000000000..8c18593331 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "31BCA673-F514-4E2E-A8B3-A0B42D63884C", + "name": "On-Chip Memory Attributes", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating the use of memory attributes to control memory structures in a DPC++ program.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./memory_attributes.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "memory_attributes.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/CMakeLists.txt new file mode 100755 index 0000000000..290fd004e5 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/CMakeLists.txt @@ -0,0 +1,96 @@ +set(SOURCE_FILE memory_attributes.cpp) +set(TARGET_NAME memory_attributes) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) + +endif() + +# report + +if(WIN32) + set(REPORT ${TARGET_NAME}_report.a) + + add_custom_target(report DEPENDS ${REPORT}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} COPYONLY) + + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) + +else() + set(REPORT ${TARGET_NAME}_report.a) + + add_custom_target(report DEPENDS ${REPORT}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/build.ninja new file mode 100755 index 0000000000..5a8b871482 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/build.ninja @@ -0,0 +1,41 @@ +source_file = memory_attributes.cpp +target_name = memory_attributes + +emulator_target = ${target_name}.fpga_emu.exe + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report_1x + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -DSINGLEPUMP -o $out + +rule gen_report_2x + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -DDOUBLEPUMP -o $out + +rule gen_report_1x_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -DSINGLEPUMP -o $out + +rule gen_report_2x_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -DDOUBLEPUMP -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +report_target_1x = singlepump_report.a +report_target_2x = doublepump_report.a +report_target_1x_s10_pac = singlepump_s10_pac_report.a +report_target_2x_s10_pac = doublepump_s10_pac_report.a + +build report: phony ${report_target_1x} ${report_target_2x} +build ${report_target_1x}: gen_report_1x ${source_file} +build ${report_target_2x}: gen_report_2x ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_1x_s10_pac} ${report_target_2x_s10_pac} +build ${report_target_1x_s10_pac}: gen_report_1x_s10_pac ${source_file} +build ${report_target_2x_s10_pac}: gen_report_2x_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/memory_attributes.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/memory_attributes.cpp new file mode 100755 index 0000000000..f1fa9afb3a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/memory_attributes/src/memory_attributes.cpp @@ -0,0 +1,227 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +constexpr size_t kRows = 8; +constexpr size_t kVec = 4; +constexpr size_t kMaxVal = 512; +constexpr size_t kNumTests = 64; +constexpr size_t kMaxIter = 8; + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +template +class KernelCompute; + +using UintArray = std::array; +using Uint2DArray = std::array, kRows>; +using UintSQArray = std::array, kVec>; // square + +// The shared compute function for host and device code +size_t Compute(unsigned init, Uint2DArray &dict_offset) { + + // We do not provide any attributes for compare_offset and hash; + // we let the compiler decide what's best based on the access pattern + // and their size. + UintSQArray compare_offset; + UintArray hash; + + #pragma unroll + for (size_t i = 0; i < kVec; i++) { + hash[i] = (++init) & (kRows - 1); + } + + size_t count = 0, iter = 0; + do { + // After unrolling both loops, we have kVec*kVec reads from dict_offset + #pragma unroll + for (size_t i = 0; i < kVec; i++) { + #pragma unroll + for (size_t k = 0; k < kVec; ++k) { + compare_offset[k][i] = dict_offset[hash[i]][k]; + } + } + + // After unrolling, we have kVec writes to dict_offset + #pragma unroll + for (size_t k = 0; k < kVec; ++k) { + dict_offset[hash[k]][k] = (init << k); + } + init++; + + #pragma unroll + for (size_t i = 0; i < kVec; i++) { + #pragma unroll + for (size_t k = 0; k < kVec; ++k) { + count += compare_offset[i][k]; + } + } + } while (++iter < kMaxIter); + return count; +} + +// Declare a 2D array with memory attribute 'doublepump' if +// attr_type=2, attribute 'singlepump' if attr_type=1, +// and no memory attributes otherwise +template +Uint2DArray CreateDictOffset() { + if (attr_type == 1) { + + // The memory attributes apply to the array's declaration + [[intelfpga::singlepump, intelfpga::memory("MLAB"), + intelfpga::numbanks(kVec), intelfpga::max_replicates(kVec)]] + Uint2DArray dict_offset; + + return dict_offset; + + } else if (attr_type == 2) { + + [[intelfpga::doublepump, intelfpga::memory("MLAB"), + intelfpga::numbanks(kVec), intelfpga::max_replicates(kVec)]] + Uint2DArray dict_offset; + + return dict_offset; + } + + return Uint2DArray{}; +} + +template +unsigned RunKernel(unsigned init, const unsigned dict_offset_init[]) { + unsigned result = 0; + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + queue q(device_selector, dpc_common::exception_handler); + + // Flatten the 2D array to a 1D buffer, because the + // buffer constructor requires a pointer to input data + // that is contiguous in memory. + buffer buffer_d(dict_offset_init, + range<1>(kRows * kVec)); + buffer buffer_r(&result, 1); + + auto e = q.submit([&](handler &h) { + auto accessor_d = buffer_d.get_access(h); + auto accessor_r = buffer_r.get_access(h); + + h.single_task>( + [=]() [[intel::kernel_args_restrict]] { + + // Declare 'dict_offset' to be single or double pumped + Uint2DArray dict_offset = CreateDictOffset(); + + // Initialize 'dict_offset' with values from global memory. + for (size_t i = 0; i < kRows; ++i) { + #pragma unroll + for (size_t k = 0; k < kVec; ++k) { + // After unrolling, we end up with kVec writes to dict_offset. + dict_offset[i][k] = accessor_d[i * kVec + k]; + } + } + accessor_r[0] = Compute(init, dict_offset); + }); + }); + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + return result; +} + +// This host side function performs the same computation as the device side +// kernel, and is used to verify functional correctness. +unsigned GoldenRun(unsigned init, unsigned const dict_offset_init[]) { + Uint2DArray dict_offset; + for (size_t i = 0; i < kRows; ++i) { + for (size_t k = 0; k < kVec; ++k) { + dict_offset[i][k] = dict_offset_init[i * kVec + k]; + } + } + return Compute(init, dict_offset); +} + +int main() { + srand(0); + + Uint2DArray dict_offset_init; + + bool passed = true; + + for (size_t j = 0; j < kNumTests; j++) { + unsigned init = rand() % kMaxVal; + unsigned int dict_offset_init[kRows * kVec]; + + // initialize input data with random values + for (size_t i = 0; i < kRows; ++i) { + for (size_t k = 0; k < kVec; ++k) { + dict_offset_init[i * kVec + k] = rand() % kMaxVal; + } + } + + // compute the golden result + unsigned golden_result = GoldenRun(init, dict_offset_init); + + // run the kernel with 'singlepump' memory attribute + unsigned result_sp = RunKernel<1>(init, dict_offset_init); + + if (!(result_sp == golden_result)) { + passed = false; + std::cout << " Test#" << j + << ": mismatch: " << result_sp << " != " << golden_result + << " (result_sp != golden_result)\n"; + } + + // run the kernel with 'doublepump' memory attribute + unsigned result_dp = RunKernel<2>(init, dict_offset_init); + + if (!(result_dp == golden_result)) { + passed = false; + std::cout << " Test#" << j + << ": mismatch: " << result_dp << " != " << golden_result + << " (result_dp != golden_result)\n"; + } + + // run the kernel with no memory attributes + unsigned result_na = RunKernel<0>(init, dict_offset_init); + + if (!(result_na == golden_result)) { + passed = false; + std::cout << " Test#" << j + << ": mismatch: " << result_na << " != " << golden_result + << " (result_na != golden_result)\n"; + } + } + + if (passed) { + std::cout << "PASSED: all kernel results are correct.\n"; + } else { + std::cout << "FAILED\n"; + return 1; + } + + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/CMakeLists.txt new file mode 100755 index 0000000000..63f680d7fd --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(Pipes) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/README.md new file mode 100755 index 0000000000..f168deb09f --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/README.md @@ -0,0 +1,250 @@ +# Data Transfers Using Pipes +This FPGA tutorial shows how to use pipes to transfer data between kernels. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | The basics of the of DPC++ pipes extension for FPGA
How to declare and use pipes in a DPC++ program +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +This tutorial demonstrates how a kernel in a DPC++ FPGA program transfers +data to or from another kernel using the pipe abstraction. + +### Definition of a Pipe +The primary goal of pipes is to allow concurrent execution of kernels that need +to exchange data. + +A pipe is a FIFO data structure connecting two endpoints that communicate +using the pipe's `read` and `write` operations. An endpoint can be either a kernel +or an external I/O on the FPGA. Therefore, there are three types of pipes: +* kernel-kernel +* kernel-I/O +* I/O-kernel + +This tutorial focuses on kernel-kernel pipes, but +the concepts discussed here apply to other kinds of pipes as well. + +The `read` and `write` operations have two variants: +* Blocking variant: Blocking operations may not return immediately, but are always successful. +* Non-blocking variant: Non-blocking operations take an extra boolean parameter +that is set to `true` if the operation happened successfully. + +Data flows in a single direction inside pipes. In other words, for a pipe `P` +and two kernels using `P`, one of the kernels is exclusively going to perform +`write` to `P` while the other kernel is exclusively going to perform `read` from +`P`. Bidirectional communication can be achieved using two pipes. + +Each pipe has a configurable `capacity` parameter describing the number of `write` +operations that may be performed without any `read` operations being performed. For example, +consider a pipe `P` with capacity 3, and two kernels `K1` and `K2` using +`P`. Assume that `K1` performed the following sequence of operations: + + `write(1)`, `write(2)`, `write(3)` + +In this situation, the pipe is full, because three (the `capacity` of +`P`) `write` operations were performed without any `read` operation. In this +situation, a `read` must occur before any other `write` is allowed. + +If a `write` is attempted to a full pipe, one of two behaviors occur: + + * If the operation is non-blocking, it returns immediately and its + boolean parameter is set to `false`. The `write` does not have any effect. + * If the operation is blocking, it does not return until a `read` is + performed by the other endpoint. Once the `read` is performed, the `write` + takes place. + +The blocking and non-blocking `read` operations have analogous behaviors when +the pipe is empty. + +### Defining a Pipe in DPC++ + +In DPC++, pipes are defined as a class with static members. To declare a pipe that +transfers integer data and has `capacity=4`, use a type alias: + +```c++ +using ProducerToConsumerPipe = pipe< // Defined in the DPC++ headers. + class ProducerConsumerPipe, // An identifier for the pipe. + int, // The type of data in the pipe. + 4>; // The capacity of the pipe. +``` + +The `class ProducerToConsumerPipe` template parameter is important to the +uniqueness of the pipe. This class need not be defined, but must be distinct +for each pipe. Consider another type alias with the exact same parameters: + +```c++ +using ProducerToConsumerPipe2 = pipe< // Defined in the DPC++ headers. + class ProducerConsumerPipe, // An identifier for the pipe. + int, // The type of data in the pipe. + 4>; // The capacity of the pipe. +``` + +The uniqueness of a pipe is derived from a combination of all three template +parameters. Since `ProducerToConsumerPipe` and `ProducerToConsumerPipe2` have +the same template parameters, they define the same pipe. + +### Using a Pipe in DPC++ + +This code sample defines a `Consumer` and a `Producer` kernel connected +by the pipe `ProducerToConsumerPipe`. Kernels use the +`ProducerToConsumerPipe::write` and `ProducerToConsumerPipe::read` methods for +communication. + +The `Producer` kernel reads integers from the global memory and writes those integers +into `ProducerToConsumerPipe`, as shown in the following code snippet: + +```c++ +void Producer(queue &q, buffer &input_buffer) { + std::cout << "Enqueuing producer...\n"; + + auto e = q.submit([&](handler &h) { + auto input_accessor = input_buffer.get_access(h); + auto num_elements = input_buffer.get_count(); + + h.single_task([=]() { + for (size_t i = 0; i < num_elements; ++i) { + ProducerToConsumerPipe::write(input_accessor[i]); + } + }); + }); +} +``` + +The `Consumer` kernel reads integers from `ProducerToConsumerPipe`, processes +the integers (`ConsumerWork(i)`), and writes the result into the global memory. + +```c++ +void Consumer(queue &q, buffer &output_buffer) { + std::cout << "Enqueuing consumer...\n"; + + auto e = q.submit([&](handler &h) { + auto output_accessor = output_buffer.get_access(h); + size_t num_elements = output_buffer.get_count(); + + h.single_task([=]() { + for (size_t i = 0; i < num_elements; ++i) { + int input = ProducerToConsumerPipe::read(); + int answer = ConsumerWork(input); + output_accessor[i] = answer; + } + }); + }); +} +``` + +**NOTE:** The `read` and `write` operations used are blocking. If +`ConsumerWork` is an expensive operation, then `Producer` might fill +`ProducerToConsumerPipe` faster than `Consumer` can read from it, causing +`Producer` to block occasionally. + +## Key Concepts +* The basics of the of DPC++ pipes extension for FPGA +* How to declare and use pipes in a DPC++ program + +## License +This code sample is licensed under MIT license. + +## Building the `pipes` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (FPGA) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `pipes_report.prj/reports/` or `pipes_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the "System Viewer" to visualize the structure of the kernel system. Identify the pipe connecting the two kernels. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./pipes.fpga_emu (Linux) + pipes.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./pipes.fpga (Linux) + ``` + +### Example of Output +``` +Input Array Size: 1024 +Enqueuing producer... +Enqueuing consumer... +PASSED: The results are correct +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.sln new file mode 100755 index 0000000000..aa652a2f4b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pipes", "pipes.vcxproj", "{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Debug|x64.ActiveCfg = Debug|x64 + {BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Debug|x64.Build.0 = Debug|x64 + {BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Release|x64.ActiveCfg = Release|x64 + {BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {47B77939-C7AE-44EC-AD38-EF8459A9C41A} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.vcxproj new file mode 100755 index 0000000000..7bae18102e --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/pipes.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {be9e5e70-f644-4119-9a1f-e2b75c85b9e2} + Win32Proj + pipes + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)pipes.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)pipes.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/sample.json new file mode 100755 index 0000000000..1c67d49d41 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "58CF1ABA-5D08-40B7-ACC2-5CB904261413", + "name": "Data Transfers Using Pipes", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "How to use pipes to transfer data between kernels on an FPGA", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./pipes.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "pipes.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/CMakeLists.txt new file mode 100755 index 0000000000..f8a80a7e68 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE pipes.cpp) +set(TARGET_NAME pipes) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# report + +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/build.ninja new file mode 100755 index 0000000000..a45c4c511c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/build.ninja @@ -0,0 +1,30 @@ +source_file = pipes.cpp +target_name = pipes + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware -std=c++14 +emulator_flags = -fintelfpga -DFPGA_EMULATOR -std=c++14 + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/pipes.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/pipes.cpp new file mode 100755 index 0000000000..71de729c3c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/pipes/src/pipes.cpp @@ -0,0 +1,135 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + + +using namespace sycl; + +using ProducerToConsumerPipe = intel::pipe< // Defined in the SYCL headers. + class ProducerConsumerPipe, // An identifier for the pipe. + int, // The type of data in the pipe. + 4>; // The capacity of the pipe. + +// Forward declare the kernel names +// (This will become unnecessary in a future compiler version.) +class ProducerTutorial; +class ConsumerTutorial; + +// The Producer kernel reads data from a SYCL buffer and writes it to +// a pipe. This transfers the input data from the host to the Consumer kernel +// that is running concurrently. +void Producer(queue &q, buffer &input_buffer) { + std::cout << "Enqueuing producer...\n"; + + auto e = q.submit([&](handler &h) { + auto input_accessor = input_buffer.get_access(h); + size_t num_elements = input_buffer.get_count(); + + h.single_task([=]() { + for (size_t i = 0; i < num_elements; ++i) { + ProducerToConsumerPipe::write(input_accessor[i]); + } + }); + }); +} + + +// An example of some simple work, to be done by the Consumer kernel +// on the input data +int ConsumerWork(int i) { return i * i; } + +// The Consumer kernel reads data from the pipe, performs some work +// on the data, and writes the results to an output buffer +void Consumer(queue &q, buffer &out_buf) { + std::cout << "Enqueuing consumer...\n"; + + auto e = q.submit([&](handler &h) { + auto out_accessor = out_buf.get_access(h); + size_t num_elements = out_buf.get_count(); + + h.single_task([=]() { + for (size_t i = 0; i < num_elements; ++i) { + int input = ProducerToConsumerPipe::read(); + int answer = ConsumerWork(input); + out_accessor[i] = answer; + } + }); + }); +} + +int main(int argc, char *argv[]) { + size_t array_size = (1 << 10); + + if (argc > 1) { + std::string option(argv[1]); + if (option == "-h" || option == "--help") { + std::cout << "Usage: \n \n\nFAILED\n"; + return 1; + } else { + array_size = std::stoi(option); + } + } + + std::cout << "Input Array Size: " << array_size << "\n"; + + std::vector producer_input(array_size, -1); + std::vector consumer_output(array_size, -1); + + // Initialize the input data + for (size_t i = 0; i < array_size; i++) + producer_input[i] = i; + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + queue q(device_selector, dpc_common::exception_handler); + + buffer producer_buffer(producer_input); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer consumer_buffer(consumer_output.data(), array_size); + + // Run the two kernels concurrently. The Producer kernel sends + // data via a pipe to the Consumer kernel. + Producer(q, producer_buffer); + Consumer(q, consumer_buffer); + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // Verify result + for (size_t i = 0; i < array_size; i++) { + if (consumer_output[i] != ConsumerWork(producer_input[i])) { + std::cout << "input = " << producer_input[i] + << " expected: " << ConsumerWork(producer_input[i]) + << " got: " << consumer_output[i] << "\n"; + std::cout << "FAILED: The results are incorrect\n"; + return 1; + } + } + std::cout << "PASSED: The results are correct\n"; + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/CMakeLists.txt new file mode 100755 index 0000000000..c18e7e73ed --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(SpeculatedIterations) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/README.md new file mode 100755 index 0000000000..bd1d9359bf --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/README.md @@ -0,0 +1,174 @@ + +# Speculated Iterations of a Loop +This FPGA tutorial demonstrates applying the `speculated_iterations` attribute to a loop in a task kernel to enable more efficient loop pipelining. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | What the `speculated_iterations` attribute does
How to apply the `speculated_iterations` attribute to loops in your program
How to determine the optimal number of speculated iterations +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +Loop speculation is an advanced loop pipelining optimization technique. It enables loop iterations to be initiated before determining whether they should have been initiated. "Speculated iterations" are those iterations that launch before the exit condition computation has completed. This is beneficial when the computation of the exit condition is preventing effective loop pipelining. + +The `speculated_iterations` attribute is a loop attribute that enables you to directly control the number of speculated iterations for a loop. The attribute `[[intelfpga::speculated_iterations(N)]]` takes an integer argument `N` to specify the permissible number of iterations to speculate. + +### Simple example +``` + [[intelfpga::speculated_iterations(1)]] + while (sycl::log10(x) < N) { + x += 1; + } + dst[0] = x; +``` +The loop in this example will have one speculated iteration. +### Operations with side effects +When launching speculated iterations, operations with side-effects (such as stores to memory) must be predicated by the exit condition to ensure functional correctness. For this reason, operations with side-effects must be scheduled until after the exit condition has been computed. + +### Optimizing the number of speculated iterations +Loop speculation is beneficial when the computation of the loop exit condition is the bottleneck preventing the compiler from achieving a smaller initiation interval (II). In such instances, increasing the number of speculated iterations often improves the II. Note that this may also uncover additional bottlenecks preventing the further optimization of the loop. + +However, adding speculated iterations is not without cost. They introduce overhead in nested loops, reducing overall loop occupancy. Consider the code snippet below: +```c++ +for (size_t i = 0; i < kMany; ++i) { + // The compiler may automatically infer speculated iterations + for (size_t j = 0; complex_exit_condition(j); ++j) { + output[i,j] = some_function(input[i,j]); + } +} +``` +The *i+1*th invocation of the inner loop cannot begin until all real and speculated iterations of its *i*th invocation have completed. This overhead is negligible if the number of speculated iterations is much less than the number of real iterations. However, when the inner loop's trip count is small on average, the overhead becomes non-negligible and the speculated iterations can become detrimental to throughput. In such circumstances, the `speculated_iterations` attribute can be used to *reduce* the number of speculated iterations chosen by the compiler's heuristics. + +In both increasing and decreasing cases, some experimentation is usually necessary. Choosing too new speculated iterations can increase the II because multiple cycles are required to evaluate the exit condition. Choosing too many speculated iterations creates unneeded "dead space" between sequential invocations of an inner loop. + +### Tutorial example +In the tutorial design's kernel, the exit condition of the loop involves a logarithm and a compare operation. This complex exit condition prevents the loop from achieving ```II=1```. + +The design enqueues variants of the kernel with 0, 10 and 27 speculated iterations respectively to demonstrate the effect of the `speculated_iterations` attribute on the Intel® PAC with Intel Arria® 10 GX FPGA. Different numbers are chosen for the Intel® PAC with Intel Stratix® 10 SX FPGA accordingly. + +## Key Concepts +* Description of the `speculated_iterations` attribute. +* How to apply the `speculated_iterations` attribute to loops in your program. +* Optimizing the number of speculated iterations. + +## License +This code sample is licensed under MIT license. + + +## Building the `speculated_iterations` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `speculated_iterations_report.prj/reports/` or `speculated_iterations_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +In the "Loop Analysis" section of the report, check the II of the loop in each version of the kernel. Use the kernel with 0 speculated iteration as a base version, check its loop II as a hint for the ideal number for speculated iterations. The information shown below is from compiling on the Intel® PAC with Intel Arria® 10 GX FPGA. + +* When the number of `speculated iterations` is set to 0, the loop II is 27. +* Setting the `speculated iterations` to 27 yielded an II of 1. +* Setting the `speculated iterations` to an intermediate value of 10 results in an II of 3. + + +These results make sense when you recall that the loop exit computation has a latency of 27 cycles (suggested by looking at the loop II with 0 speculation). With no speculation, a new iteration can only be launched every 27 cycles. Increasing the speculation to 27 enables a new iteration to launch every cycle. Reducing the speculation to 10 results in an II of 3 because 10 speculated iteration multipled by 3 cycles between iterations leaves 30 cycles in which to compute the exit condition, sufficient to cover the 27-cycle exit condition. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./speculated iterations.fpga_emu (Linux) + speculated iterations.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./speculated iterations.fpga (Linux) + ``` + +### Example of Output +``` +Speculated Iterations: 0 -- kernel time: 8564.98 ms +Performance for kernel with 0 speculated iterations: 11675 MFLOPs +Speculated Iterations: 10 -- kernel time: 952 ms +Performance for kernel with 10 speculated iterations: 105076 MFLOPs +Speculated Iterations: 27 -- kernel time: 317 ms +Performance for kernel with 27 speculated iterations: 315181 MFLOPs +PASSED: The results are correct +``` +The execution time and throughput for each kernel is displayed. + +Note that this performance difference will be apparent only when running on FPGA hardware. The emulator, while useful for verifying functionality, will generally not reflect differences in performance. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/sample.json new file mode 100755 index 0000000000..28f98e4a48 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "66A57127-1F8D-4769-8CCB-16ECC56A446F", + "name": "Speculated Iterations of a Loop", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating the speculated_iterations attribute", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./speculated_iterations.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "speculated_iterations.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.sln new file mode 100755 index 0000000000..7155665db9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "speculated_iterations", "speculated_iterations.vcxproj", "{CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.ActiveCfg = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Debug|x64.Build.0 = Debug|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.ActiveCfg = Release|x64 + {CF6A576B-665D-4F24-BB62-0DAE7A7B3C64}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {92BEFAAB-0365-4E5A-9C4A-E50AB49B2A6B} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj new file mode 100755 index 0000000000..2ec6e32238 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj @@ -0,0 +1,161 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {cf6a576b-665d-4f24-bb62-0dae7a7b3c64} + Win32Proj + speculated_iterations + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)speculated_iterations.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)speculated_iterations.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/CMakeLists.txt new file mode 100755 index 0000000000..5140f431a0 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/CMakeLists.txt @@ -0,0 +1,97 @@ +set(SOURCE_FILE speculated_iterations.cpp) +set(TARGET_NAME speculated_iterations) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +# This tutorial needs to know which FPGA we are targetting to decide how many speculated_iterations to use +IF (_FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + SET(FPGA_BOARD_MACRO "-DA10") +ELSEIF(_FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + SET(FPGA_BOARD_MACRO "-DS10") +ELSE() + MESSAGE(FATAL_ERROR "Unknown board!") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga ${FPGA_BOARD_MACRO}") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${FPGA_BOARD_MACRO} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR ${FPGA_BOARD_MACRO}") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/build.ninja new file mode 100755 index 0000000000..e8c5f7f77e --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/build.ninja @@ -0,0 +1,32 @@ +source_file = speculated_iterations.cpp +target_name = speculated_iterations + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + # For the emulator, it makes no difference whether this sample is compiled with -DA10 or -DS10 + command = dpcpp /GX ${emulator_flags} -DA10 $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -DA10 -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -DS10 -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/speculated_iterations.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/speculated_iterations.cpp new file mode 100755 index 0000000000..f689a6eb03 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/src/speculated_iterations.cpp @@ -0,0 +1,150 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + +// Use smaller values if run on the emulator to keep the CPU runtime reasonable +// Use the largest possible int values on the FPGA to show the difference in +// performance with and without speculated_iterations +#if defined(FPGA_EMULATOR) +constexpr float kUpper = 3.0f; +constexpr size_t kExpectedIterations = 1e3; +#else +constexpr float kUpper = 8.0f; +constexpr size_t kExpectedIterations = 1e8; +#endif + +using namespace sycl; + +// This is the class used to name the kernel for the runtime. +// This must be done when the kernel is expressed as a lambda. +template class KernelCompute; + +template +void ComplexExit(const device_selector &selector, float bound, int &res) { + double kernel_time_ms = 0.0; + try { + // create the device queue with profiling enabled + auto prop_list = property_list{property::queue::enable_profiling()}; + queue q(selector, dpc_common::exception_handler, prop_list); + + // The scalar inputs are passed to the kernel using the lambda capture, + // but a SYCL buffer must be used to return a scalar from the kernel. + buffer buffer_res(&res, 1); + + event e = q.submit([&](handler &h) { + auto accessor_res = buffer_res.get_access(h); + + h.single_task>([=]() { + int x = 1; + + // Computing the exit condition of this loop is a complex operation. + // Since the value of var is not known at compile time, the loop + // trip count is variable and the exit condition must be evaluated at + // each iteration. + [[intelfpga::speculated_iterations(spec_iter)]] + while (sycl::log10((float)(x)) < bound) { + x++; + } + + accessor_res[0] = x; + }); + }); + + // get the kernel time in milliseconds + // this excludes memory transfer and queuing overhead + double startk = + e.template get_profiling_info(); + double endk = + e.template get_profiling_info(); + kernel_time_ms = (endk - startk) * 1e-6; + + } catch (exception const &exc) { + std::cout << "Caught synchronous SYCL exception:\n" << exc.what() << "\n"; + if (exc.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // MFLOPs = mega floating point operations per second + double mflops = (double)(kExpectedIterations) / kernel_time_ms; + + std::cout << "Speculated Iterations: " << spec_iter + << " -- kernel time: " << kernel_time_ms << " ms\n"; + + std::cout << std::fixed << std::setprecision(0) + << "Performance for kernel with " << spec_iter + << " speculated iterations: " << mflops << " MFLOPs\n"; +} + +int main(int argc, char *argv[]) { +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; +#else + intel::fpga_selector selector; +#endif + + float bound = kUpper; + + // We don't want "bound" to be a compile-time known constant value + if (argc > 1) { + std::string option(argv[1]); + bound = std::stoi(option); + } + + // result variables + int r0, r1, r2; + +// Choose the number of speculated iterations based on the FPGA board selected. +// This reflects compute latency differences on different hardware architectures, +// and is a low-level optimization. +#if defined(A10) + ComplexExit<0>(selector, bound, r0); + ComplexExit<10>(selector, bound, r1); + ComplexExit<27>(selector, bound, r2); +#elif defined(S10) + ComplexExit<0>(selector, bound, r0); + ComplexExit<10>(selector, bound, r1); + ComplexExit<54>(selector, bound, r2); +#else + std::static_assert(false, "Invalid FPGA board macro"); +#endif + + bool passed = true; + + if (std::fabs(std::log10(r0) - bound) > 1e-5) { + std::cout << "Test 0 result mismatch " << std::log10(r0) + << " not within 0.00001 of " << bound << "\n"; + passed = false; + } + + if (std::fabs(std::log10(r1) - bound) > 1e-5) { + std::cout << "Test 1 result mismatch " << std::log10(r1) + << " not within 0.00001 of " << bound << "\n"; + passed = false; + } + + if (std::fabs(std::log10(r2) - bound) > 1e-5) { + std::cout << "Test 2 result mismatch " << std::log10(r2) + << " not within 0.00001 of " << bound << "\n"; + passed = false; + } + + + std::cout << (passed ? "PASSED: The results are correct" : "FAILED") << "\n"; + + return passed ? 0 : -1; +} + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/CMakeLists.txt new file mode 100755 index 0000000000..ec7f83f6b3 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(DeviceLink) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/README.md b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/README.md new file mode 100755 index 0000000000..e2991414b9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/README.md @@ -0,0 +1,203 @@ + +# Separating Host and Device Code Compilation +This FPGA tutorial demonstrates how to separate the compilation of a program's host code and device code to save development time. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Why to separate host and device code compilation in your FPGA project
How to use the `-reuse-exe` and device link methods
Which method to choose for your project +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +Intel® oneAPI DPC++ Compiler (Beta) only supports ahead-of-time (AoT) compilation for FPGA, which means that an FPGA device image is generated at compile time. The FPGA device image generation process can take hours to complete. If you make a change that is exclusive to the host code, it is more efficient to recompile your host code only, re-using the existing FPGA device image and circumventing the time-consuming device compilation process. + +The Intel® oneAPI DPC++ Compiler (Beta) provides two different mechanisms to separate device code and host code compilation. +* Passing `-reuse-exe=` flag to `dpcpp` instructs the compiler to attempt to reuse the existing FPGA device image. +* The more explicit "device link" method requires you to separate the host and device code into separate files. When a code change only applies to host-only files, an FPGA device image is not regenerated. + +This tutorial explains both mechanisms and the pros and cons of each. The included code sample demonstrates the device link method. + +### Using the `-reuse-exe` flag + +If the device code and options affecting the device have not changed since the previous compilation, passing the `-reuse-exe=` flag to `dpcpp` instructs the compiler to extract the compiled FPGA binary from the existing executable and package it into the new executable, saving the device compilation time. + +**Sample usage:** + +``` +# Initial compilation +dpcpp -o out.fpga -Xshardware -fintelfpga +``` +The initial compilation generates an FPGA device image, which takes several hours. Now, make some changes to the host code. +``` +# Subsequent recompilation +dpcpp -o out.fpga -reuse-exe=out.fpga -Xshardware -fintelfpga +``` +If `out.fpga` does not exist, `-reuse-exe` is ignored and the FPGA device image is regenerated. This will always be the case the first time a project is compiled. + +If `out.fpga` is found, the compiler verifies that no changes that affect the FPGA device code have been made since the last compilation. If so, the compiler reuses the existing FPGA binary and only the host code is recompiled. The recompilation process takes a few minutes. Note that the device code is *partially* re-compiled (the equivalent of a report flow compile) in order to check that the FPGA binary can safely be reused. + +### Using the device link method + +The program accompanying this tutorial is separated into two files, `main.cpp` and `kernel.cpp`. Only the `kernel.cpp` file contains device code. + +In the normal compilation process, FPGA device image generation happens at link time. As a result, any change to either `main.cpp` or `kernel.cpp` will trigger the regeneration of an FPGA device image. + +``` +# normal compile command +dpcpp -fintelfpga main.cpp kernel.cpp -Xshardware -o link.fpga +``` + +The following graph depicts this compilation process: + +![](normal_compile.png) + + +If you want to iterate on the host code and avoid long compile time for your FPGA device, consider using a device link to separate device and host compilation: + +``` +# device link command +dpcpp -fintelfpga -fsycl-link=image [options] +``` + +The compilation is a 3-step process: + +1. Compile the device code: + + ``` + dpcpp -fintelfpga -fsycl-link=image kernel.cpp -o dev_image.a -Xshardware + ``` + Input files should include all source files that contain device code. This step may take several hours. + + +2. Compile the host code: + + ``` + dpcpp -fintelfpga main.cpp -c -o host.o + ``` + Input files should include all source files that only contain host code. This takes seconds. + + +3. Create the device link: + + ``` + dpcpp -fintelfpga host.o dev_image.a -o fast_recompile.fpga + ``` + The input should have N (N >= 0) host object files *(.o)* and one device image file *(.a)*. This takes seconds. + +**NOTE:** You only need to perform steps 2 and 3 when modifying host-only files. + +The following graph depicts device link compilation process: + +![](fast_recompile.png) + +### Which method to use? +Of the two methods described, `-reuse-exe` is easier to use. It also allows you to keep your host and device code as single source, which is preferred for small programs. + +For larger and more complex projects, the device link method has the advantage of giving you complete control over the compiler's behavior. +* When using `-reuse-exe`, the compiler must spend time partially recompiling and then analyzing the device code to ensure that it is unchanged. This takes several minutes for larger designs. Compiling separate files does not incur this time. +* When using `-reuse-exe`, you may occasionally encounter a "false positive" where the compiler wrongly believes that it must recompile your device code. In a single source file, the device and host code are coupled, so certain changes to the host code can change the compiler's view of the device code. The compiler will always behave conservatively and trigger a full recompilation if it cannot prove that reusing the previous FPGA binary is safe. Compiling separate files eliminates this possibility. + + +## Key Concepts +* Why to separate host and device code compilation in your FPGA project +* How to use the `-reuse-exe` and device link methods +* Which method to choose for your project + +## License +This code sample is licensed under MIT license. + + +## Building the `fast_recompile` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + **NOTE:** For the FPGA emulator target and the FPGA target, the device link method is used. +2. Compile the design through the generated `Makefile`. The following build targets are provided: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + +### On a Windows* System +Note: `cmake` is not yet supported on Windows. A build.ninja file is provided instead. + +1. Enter the source file directory. + ``` + cd src + ``` + +2. Compile the design. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + ninja fpga_emu + ``` + **NOTE:** For the FPGA emulator target, the device link method is used. + * Generate the optimization report: + + ``` + ninja report + ``` + If you are targeting Intel® PAC with Intel Stratix® 10 SX FPGA, instead use: + ``` + ninja report_s10_pac + ``` + * Compiling for FPGA hardware is not yet supported on Windows. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./fast_recompile.fpga_emu (Linux) + fast_recompile.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./fast_recompile.fpga (Linux) + ``` + +### Example of Output +``` +PASSED: results are correct +``` +### Discussion of Results +Try modifying `main.cpp` to produce a different output message. Then, perform a host-only recompile via the device link method to see how quickly the design is recompiled. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/device_link.png b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/device_link.png new file mode 100755 index 0000000000000000000000000000000000000000..18619231fa33f3d194fc1cfc857df1ac3f83a36d GIT binary patch literal 26416 zcmd432T)U6{4N?r;h+K@QIRGp3QARaM~;OKf_x!U*Gz^@0WKk3^ds;@LT`@ z0BqV?PmKWpCVl|m%*Y?ij5{?BhyO4xX9A5i{{qyG^I;i@b8Z^?8UR2;(!~>p^NcjB zpO!@+0B|Mf_y3t?SAkamfNQ_@Qw@_>w!7%aQL(wmc@pt&R>7(1i+ADHn#mH+7CyE& zUDGamx5OhfW)|__#Z^cLjN{{rw7frxCPeDlGF?}rmo5I3>k)bG{FL>t=$hL)4^Tpw+pvr}NjHXPM0@y6=teS;9-P4-KQb1II`6LKpHEB0~z?z=edk z8_W5b`HEYkDsJPT%~AaF>xa=K42?E0+H^_}hg|9r3a8qIH<8upzq%loMoQpJtt^M= zBf;&*<74Wltbt>PQOKk-^UfIZtU#_Cyj3P*qqs}m@Uv%vpqrCtC z%V!h+CQbkMl06Q2VX)AHHprBUM;tiYYAk(onYuxwf#fT8&-VXHT~Q!Lw1mDm>i-0` z!4AxA+}+c(*S4|}C@Ad)?MU$nykT$NIk0Iv=KLrS@}(qon92l_u0TYgmwt(MOZLr; zAtCI5t40E66^8y$n&46Wsuou9sF2=X5WVzswXU;YrTwUCVqiljkdpR@tDt**VCEh% zYm>Bw$him^o`qsUIQ)_PEdIu!;-H+CIYUeHB&CJmXb$Xs+WE4HC z{(73#VZ*%>ur;}+T>!T!Fa?Gm4s2#e{nBNO980@MU>v1Bo#H2cm_J;BJLpwf)v0uY z=Uu${GMKF3aY)QD!uY7~1r^Gs}?}=q+!ZpfGRnxpPC&~L3qMLWo31SWTc|ur_Wd_s^ zf)n4o2ag7#;QElWpbMM_u8G}z@L z?(cYvteyE~KQS>xs);(&wcn$tCSvy*UyZk@Ku($t-k`F6JW4f6_SyHt@V4a+%99MP zg5$7N9h26YpI?k~zJDQe70PlfyeIi=#Nvm|&<4Z3-!M3BX5UuftsE9)*BxS2pra%-r;AI**#_5ZLB0x4b6|mf^0J~N817t zKI*10D}J>GMj49+VrqbG`s$D^akf2X6V+ysQoca8H_taBhay%*r65SF#^;H7?uH`= zQ~qi0?dq251@)$%IWC%^$ERFcydyKGSwSN=r&|yE@|21lLR&MJpDlrLHBAuaFMkHD z{Bex>3MXt~l`-kzYq82-*G=`U;-H5+HP|IlaVuzW=T6AZRFsTMXjuax)DUSm+5PI? zp2DPPRG4ERu6A3Z%=^WJwN0pxW-in7=KVYc$`d&>C?=Y^)@p5dxF=1k3M z4ce!c!cx8LCGnOUGd{4f16R2s2=nHx?VdlwCRLn4{W>wfl;oe8qNRKTctfuRZhyOlJ{sQ5DRpsz>DcVv>^USF2b4&F{j=&?J0;=n z1sRCi>ROq$uB~B3^!P&%` zgI2UUE|~FrK#Ztsm)ZJ{%o|*+6)|0I%?Or-FYfYI27(NQiF09RKiBVscp@>KVjTCd zLK8(A5O>OlSWsFz%t9;FWTWkso{R)&F27gqq4!O$pc>=c02PNSBZ1oXe8EaW1efS8 zC1~zVt?8Fm6+=tfB@@OTpp5TJc`JIWV%hiRXjhdQR-U(W^~pAl8gzmcZUl-5q3!nc zsp^?_s_pafn+?Lls3{bsHtI`mVU$(eL`3x=Fb0S=u%YB;x;P=a#V|uqF7s=`b5;Z! zx$$Vx+2VMS=X>8a*Uo z9DQjztp;r$=sTdp8uE$K(_g#1{mo)wb#V1boap5ymGf2$p3aSF` zgxJdOz;-n!Ugrs_vN9=LHRz3d5*eo?u{}{F;r($<;RR#GgaR3xYTB{3=ey2jHsptCNugTlB>u?l1Hebz8XlW+kNd<|e(+I?`c4(b|N+8F?2Lgt!$qtK3fsvE)p_rHsn6hwXxgmli` z+JfqX%<#fyG77D777#siIS8P+@uO+8uY3P6pXP$95THf|4%BVe!$Qx6^y(9S^!D8T zN#*bH{I~3YW>3Q7V?=ZZc}v})wu26$G_aKeclNp#ll~#v$%9pnN(BB~OvlpOvXKxI zZ2EEBI5qjHuj+r)#sX7l@k8gX_Y>D;(Q*pZ@YoJfGe66>iYYb`EF%_|*G*p+C8L}JpQz*Nbe0p$? zxWcmR;;TPwk`cL$w39caHj6dk%cZC5av+M4Z99($ zvA#e1@)BK-`o+qvp-b6Zdqri<(tr4lLIY;{;X2EkwR#AdF`+mTs56ef^Yps zBvJ6EJJE*fzw$!y5gm!TICfp4kv>|=A#;*$A{p7?`v&Q}>x(O|UKm^#kjS^LOY_lTQdcDBrx1o&R}YAG z2+7Ov z@7Q=_0g4fjMT>TAv$zzV=w*$Nz}}tvw*mG{=cSDOF<1koRo2_!-a{giGkt#q6Sjq- z%@n^s+?M7Nh^qCw_9d(%uAtr707GuY`$$V+)DZR4L+d)=vyb|jJ5lAZJ6!7;bxPVB zrw75HTnv}hn>Q#KLZezPhd#~;vG82aAncAw=BQI)Mg-;{V`Wz{PODEzyKikdcm13w zTkKCy)OoZ(BZto2(?u;-WW;JGer`Y_n`0YtNxVsHLdoa88yAa&${yVpDf&~x#st`w zpV2*b5wVqN(dsKQ6YglWV-XI1f{Q0&T3kMJ88p+|f1ZqN_Ew5UEOy>nd11-29Rljo zo(eQ?ImD-Y=4jaXL#f`T)N$IXyMxoy{cD8s?zGjYPiup3bYa=RRGUB$vf}2FPWW+Or9ePOQQ2$ndFk5i(!`k=c2a0+; zv)j#iMTXSaC&`gnP1DD(CRNPRU46XM430*`E;jF`kj?fy8>gR1K`2V6;MsOxA_hjx zn6cA_*;|>dHS>X0tDLP6)FjR7)&iR=hqqh#HCS3xl)1-%vg-2sph{|zzI)pnd~m(g zywy<^bSmO8sL6}#vtv*46pHlLhoS+qE@ewLIi%H2<%sSx+B4<))DWN#l0!E*-TbRism3;INaafT zeDECyUiG)4LyTH`|FZ}k)5VFQ(zPXTi3z-7yw3`uCjSXhqgq_o!`4blzsGZ=`48nX zDzS(O)nE8+)2x$nouiVkY7^zb)$Ikh7d5mnoC#=s!*&-{rOH^$+CIi!>@6^GQGvC6Gvp; zARn&tsufOUG3OCC>QKf z_pEunGy;?}7t&G~d+W0{HJWRlMYm3l!_$I=*y3Npv8ryVqNwY%{RR!RSUg}83zH=x z4X^HviZzM(;dy3#`k~ry*r*>vZltNy*?6pj-1h=4U?zxhQq9&q!=oAK@&k`oUR{ou zyCKtz{-VBekqBtr04t@;X_p_=2swZSR24T2^kxVqh&oLer>a|R_v!UO9PrJY%$FY& z3AN0qT}P?zti%^#jcilK=AfgH8)XSpa5l#|;WJdVqh958U6(&EAYZC12oo0#Vg|c- zq>B=6pa$<}xM>{01eg1DH?eiw(vRLoLbgUW6O+2L44X>Z4r<9R8EE^?zxMs)2w(QlL_Oah* zLVZ(6y;><81qrG<``>Kk^@n&XyXNkL%C-&`{U}^UM|T!{v=LD6!)Uh3SLIBK2mZ{h zERlH}asPQprxU=`NBH-mb?r+h>V;$i z?MBA4$hO?igb>bu$pnBwbXS*ZU!hWTYBZxTdjEZ8{+qh}f0nE-t>g|NjV>3kM@k>P z1TP19Q#|142?2xDY`9)xw@P`RuUbO_kv1 zmgeBwQTstSUh$>$&7-Cb7gbur^8Q}VYWvr)jKx_cEqILYIi$kVY z9w9XZK2b`m9@vDX**4r#9q&|;eP9=*H#m&Gl3Fdc1)3;-V3{Sf^7BB!I$yoDk|44Z z3t#|D_Cs{QA4<`jk3|PeHw;s_f!ON?AIcVGKW_{B2cOSV{v~5>_{n9q<}1|8u-Adf z!QkfgyuVX9Ll0m4%I_=qn=1{Zs6IS~h&1HoHBv+qk>~gdZaP(!z+bY>>PNb5ZNW7o zf$@8$0G{nE11o-qa6ymTdY2GH*+01zSv=#vJoL6Fd(3_G#e7hX!G9go){FA=KC=TG zdmP#3#Q?ZsSM#yj{fcsZMTUDpDl&b4+1Yw6$b#u8+)8&6#J*`eRzEoUQD24ob z!K8e6UsVaF*DTaVr*c(=tkb1N#31X$QDP!46#vr5N4WS@2BaKsYEv5wE0D>_2{OxH6zbGPEC z#tcI_Nip8y$ecd1)YC>i$z76lVF5)&Rt)0^Jge^oWp}#*QycSz5N@###B4+dPp#oz zd?pOX(89~$y4|{wZ7Hg>!mEEo^Ou9B+)0Kun9EbL{%+GNk=S?{3k098WEIOo7mkF6S5-5 zSIMDSSWYbQX75Jr1MG$T*9Xq5DDc0`m)X<(h?MQC6u|uXFA-YfmoOU>lf1gmVxYu0 zh>c{k>&tJqHfJ7L<>WZ1EW2)yx|tug_nF$ zEf{54QOV8XG3j$S;Dj_BaQWyHhn;M8hXlCDg0fq3Q|1-zOonME#?{}Pf_|}5*rU8vuZKz$jNxEH*H6%K z!Moq5O*KsoD-UTli0gY5La1byQtu|0laDv~;A%~4FF9ET%)ym!EbI#AbfS{^dVG}+ z3Mrr6<|gAcS-J)Xx3ts@PxS5~T_AUf$ja17jp~u-{&M={VC?0EU_E@mUekyE81)DzDRZ1;Wt$+OZ-y%1!FkY`f~i!sqL?o`hn2^mt+gW z0@UM)GkE|@i<|G&J)p(be+pd-IDBEQrjiWw!(A}+BkP1#`c$45KCvY7@L*XT##se@ z?t3}rh&2hdU=P>gTFQ6R40{s2wA!7D$n>KPyy2HsB_72ujZbg~^XA!T485IIohymM z=i9Rlr>3Bj`>=wY(BMNG!GuIK+n1`ueUML0@XQ<9bdWOL-0w-yaS6Q)4 z!Q?2)xg=4*FvvM)G<*Eyl^{ROGC_!{Eaes^CB>f`xK_-a=%S4#I0z=0okGuE8@@(#ju_-8P|$lYU=#ODp~!i`DU4cr5fiUqUts;^RU zQe>wy|E@oKb>rI%mVka|>+jxcoZx$Uby99^fx`PO??o46fmS zIt7@JyI5AYg{nMgCt=fRu2AT?TqQE4`8Zh2+SO)f3Rat7>OBB*szj0K@2&4D3tJ`R zX7DqO;6dHqd(=?fU7YFu-%b@&u#2&sv#I`1#m+E1uHL&~n(63O-b;ko=a`HRWJ8#0 z87ECjjvsqQLL2F!X#+`7MF00W&vEud5ZcG1!59kwK^n$u&u!kf+*j0QiTI;pd}vd?w@* zg7lZT#gA1#3XK5FxprV8)20d#736=&HcME50igDzrAJMVCwHkYit^F^)!X_ZDZuP| zN^Las0`D6J2Xsb(K~z2XOo$$PN)q|Kw*EhpOpJ&AClpl3gfv3MDJor_+eCpL-c1S* z;Q@Fn?TWAxeEeIcYxXj6Ki@b0*j8U&4wkK>vlvi8}q!^jkNh zdI7u9C#l`17V5DsjE7J$6kqC}LASQtSfcu@qmZ)yJ~T@vWFgp3TLk(V8Ev+!!NB?q zX#PLni~fI-E4*CEEAZ%adyD?MV<8D~I2(0qBywEEZ_Pt}^`EZQo5W~uJLJk#_~H@` zLO!8~k3N7s_UG%-evW3gPrV#uuSrWI+lO5liRFOLAeNN^rf60Hy|g1xmK{A4Jo1xw zlUseu{CvXOU2!Y>+!t*%=^X9!NrbB79weqP#5)M%`^k5k)~5}vNs|2mHO0POug9pd*5A&A@M*AwjPUL~66pNZ+;LxcV~W+pP$D2;mUDI1 z=2}Fb2lN_1m^+v^TD%Q>>%^_+1iu2x!hxXQlY9ia7D->*Ad?ZEd+DOPyeI#3j6{4_ z!Pk{wXZl!tBh28$=8gy3r`AdxUceF(#z}&{ir##QIYOq{j$N#KD8v^1VnYf6ZN6G} z_Bx!fzpi;@m4xg}Bf8F%G5E~%y%PD8H1Y}tIk$s}HBn z6OV!2?zQ04N0c7Ft2U4QI=}@O~HPUy=F&h}H(2>nB2-RHo%T|`_vk~C(|p$=L7C6>CfJ^~`YemM@W@}wPVVh7Z4cX;Oui}An-Mux)tL5Mml65ebVzYOM@bV zGkRi;u_so<8;lZF1=-dS_Ygu_pMZ78`C(qK%HUceL|(LM^TE!FZeas(-w^#ONxjlU zOz_}oHmqd+y%3hO^|hpMNlq$lY@eU)j@sca{z4_}%5k+~I;;fSTD8F~;YD~q$VxiF z5r#;=GGy|juoa^={oi|eA;U`tv649q3c68OA$YAW8%8(T=PlQ8&k;C%IcIu)o_*RU z6jy_~(h-pI{d9}2#guIoc&1n9a99_yd6X*=Zh}aD!Qsf5kAPXOF>>#_{U5r<+PZ!6 z9W%C$y!`uTnot<4P95rYT0 zDYeYY(gA;o$-R*oR6N|R0IEI0h@tLpsum-I!+Z8V`Fd@>38fp-2F_>H0M$rj>gXEi zST+^zfv4^H5{%FeMK`|*-QMnD51myJkb`hp%&y8}B%tL1eit`z3XV?qX^*5^>Z{9yZS$**no=nrJf@@nDp( zStU?nUirQSu#!4jVGA8lBh0GpXn0?L!;Zg<8RL_=$$elYQR^iI-c*EOB3qe?~ngf=Fb9&;*@t<1JLQ_Wxk@-g5tTM z$eP&0_qA)^FCNYO5SZq2IxgxFSFRhMx+O@hx!hGn`~a_BtC|U()&9a#NoW<&Uo?@o4IPz;g>w6dVt*#A#*b^mAVI1DuMKXmqlv-CzWk7DR!g;J8IxuukR_RTnwQ(7>j_Fl;}FR!NQr+DfZa_T?c@kh8%K0X0cy%=P{8i)Zgvb4 zefkU@{@<1c{9AkdmrdwNDR-SmH-Qb0jwr79ioH(k`~floR;->UBpsQ}7Zs`(hmPRk zy65gt005XXJV^lE_y2_^Fg(jDa&)g~`(Mg1#1#3ToZ-|@LQY>Z_5a2h0&eLbf+kM+ zQG4OtblcOR)hyqwZ1%2_?t$Iv&vFzFB* zCnT@-Eu)T@#QTOlf39V-+e`aQ=CiA6+}yix>w0lS!{GEKszY0T?ScR`#zjd6?Hm@B zWmPM>Hsm-y{UMI#9cp11Rc#ZoKF=#}jxa`9#znMMdhn%bfbR#&YCCkK!K7V?RM#8p zU7{CVhYnEK+xydFG@}es%w~9px8&eUh3WLV-JJB%Oqv2;CKSEUBQqFp<=ZiR z77+3TrxB-$t@cZ?*a>gF9dOJ{{NiDgD7V*_T7RkaSBlKJ#^Px%WpBTQVi5$Hxj)$g zsn#PDaK=LgSBCZz`jnV0L{;J$Jb2g8Rr9@pThlEPnEwtt=ID{-I`G4sr-Am$H7vDM z8S(9sEBq14qrTu?>&&-y0lMn>7>@fXwazSbUxTaET3sbQW~nx~a_TtPX7c;K9U`v< z!=F9g4mv`r-{|ItB@TY`AZuAvfNw8B{9IPHba`YjXqhg3vB6aSoU}S-zWs$g>HMhK zgYrmDuRCFa+$7v=CrhZ}$2C_W4TDNwCIG$@{i?iA{%LDP++(;=vW#t7$Xh?8qjV~> z<)YAJ?G^NVPzpKZMwUlIvaG5buUwZFkAh~XBL*57U0(i#^}<1xYA%08vW%E0YF#oJJ^fZ&jS!6L^YL^jR;hhry>4SQA)ptT8(^{v?@1Oi+=-8^#Hx8D7U(!5?itv z1W6ZWwk+R!H?FnvP}T{R^3i7~b|kWCqttHOP+e7+70)(W#HGEs>8zHl8(#cNS{T!s z*Bi08A@g&H5OYF`>2dcBb!-aObEDPxY4WrkC2CsXFc02dypFAZy-^W`>ww*s(sL9j z1po|koZ6AM6OjIh{OHp2s(UPK#)iCei~P4cb_->jD})Lp=(Ppgu}uo9LmRwU+kUAT zR9bT^mk2p0vXN(je{@B-$n`3TWUJ&Lngzn=w~b#W7$3yd6#^BOdl_0U*agJ z$?}9B{-u4! z^K|CmwWb$+1Uandgfq8Kvu1;~9F|Alp{=ZF@rzm8)?Hy;_#^1a{k5L<~zZL5#4u?)jLnv|twpA$TaFQB6b z_@#A;cW|@5E>a`q50RVu zv-jVK1SS9BJ5b7Yj=DH{>vERCy>9_iknOj%7h09m*}kV;H1~v=k4ej8pv#rGqcTL& z-PcAh6Gk|B?@c{-J?i=8Ec38y{fsAfhK!Ek1)EIW7p%C-5u7jYk%uX-?P2z%Z#0&Dgl=0q#tMg)m(!$HbX+86ipgBo~%AvIqn z+f~kZb6N>&nOZ`LtbpkG?INdad01M9GDm!R=8(@z-f3hzlmL~L^_N209ws%Dq7&!$ zF5<%LJKbQ7+cqCNRIhmZ8Fda<0!_EhiN5XJpg7HKpvDD8PTn&dWXF-Le|uX@*KnbR z;C#2XB}z32A~tS0)owPZzgo(M<+0@(%rtcJxO?xjbAZQX32aJBF%x^GZF;$XQ4NKK zH6raJKQ2oQj#`y>>hG!*aVWb9&z=zMo(d&?_k~724nxz1q>sk+9*H;Vb8_JGnRlOG zBO|y4sxMVRKWZm^aYXZoYV8q)E&ls#^c@Q!JuPVJ-E_ zvN63lZ+{s$2}d|AK;KRfRY@rpW8j6Ab9c7g zUx&Oc8DROHx*Swp?e=_?x4d4K{$omaQ}^^BlXioVv-o`CHRm&?MDPp%={_pNUv9P&wa6%_~9G8Ymp*(K-ICxFV)`JH6E|Q&7DHtMhLFQ)&_QHQd*DS2D&8 zH@!cU{$yrFzVo3f&-^(e{+5fGUTO_8;4m=_)L`!;4|TRuisG9#)ov43lyHuUW2iO2 z%C=$Lf&kZ?6v(Py$s{THq@mSXzUIA?;XXW~0I=e`nw^-yb+SXaoAGE@BKZS{+$ZXA z>{PT<*e`_68I}zBwy(L&XNLTDYLzs9edGW1b}dM0sV!l}mZCe$$yveTKXL7|ybdAl zAO+Kz>@ftD@xu-@Erl^R6+dWs;x)ZhRRF((Dt8#)WzAP_$WH6gU=UatLxVwg0rPe5 zKe|rcT26AI?#~sZeX6!1M7MX2bDtb>m1~Yn7!nek6=^EzGTR}gkjbK_iwKFa)w+Zv zC0C)>&ewa~dv3EG8th-(bkhed$~U-bwmFbK$>CwHeDvWCeD|_gMzm{oPhX92!Qs@Q z-d)uk{Ah_!zOZg7axWsXv%AM1WUTv zkUh)Ym{V9_%b%U@kRRq}oYDuTqWKN)b*H}LKF@vW^rlE$8C~n{KiGS?*`R9yxv==v zS$?_CXf=Q8aOBIgrM(;_P?vQ(pO3=0CtX>oVYDqO4l=ZAND2$DAL3nT~O_a z)Hc{mogbytu2u@a>iUsseZEDQ&FAsxkAoePS9ko)P#J>60abZJGZllanMgyPXKRCH zLqb{^82P;mWtYM<9xj80Yll_Dt-D84IbTi9bL*|y!>Upa41hy!S{eKl71$+aw28>D zpf#byC-K8Mv7@W%>f)vYk9Ylz2>GCn-UwVc1^?YhWhDAJ-qg=SFUgUUq*N}b87jB^?{=(U$gca&6$*ug8#@~EY!LFP_KF-R) zOXHq`KqgZ*y_hwt1t(4R%3|hwkermYXLd8NUBrjrX0Oz%Ix-3R@|{(%=c<*$S61D0 zcjQnly5*?u&9+B3(=+h@Ip3KMJhApEtxpQi8qa+LzEfJgx9_S&kT( z_)7tqyE(q1HnrWy5}NzgdRX(u;Cx?Ug`VyDz+}7Gwq@4K~(_S|TN4WwDdKDN;5S_iz{0tA3gH zc7Aqp9U>jMl!ve&LzTuPs<|AWmsfzFoj|G5U5f$%Bj5P7Up5vtSCyLxbiG=UyenIT z@Yt4qh;2RkAmm{`GOz(8xXYt5&hZ?DhgpkdkygF|Op#WSqPm}q(6Ye%JOQ^tZ-D<~H8e*gZ)ZS(J=3$A1StBi&VDbIkl4V9TP(cG=s^wJR@m93@~Xh)Cn$OVc;GL|?684HjoE@ahmYu@Dk1 zGQj3RgTul2^Zhol!$!PxjeMymBbv>*Y|nJ@7T$nmK|Bbx>*gJ%5B)Kzy>_ z>!L|;-31R|mHKg96k>^%blPLM5V?3?W&R7pF;vgP@UY~1lqvH!c~Jg-Rx~8amI+Qn z^$rz*RSb-K7&5EhlGz0%yq9ex_>S_tGRJNZr_!*`Z~4`{`@K6)c1`H^+fjMcLMM{eN+@fqSv^#LQ;LNV!n`}X3 z@5l!pDsia#LfK*{WFbTD`90tIApX=+YdloR^}23o*{&y>LTST-_I}WBn&^{H5R5-} z85$uEEG!5+9Qa!ptIn(a$&#_-G^!If6!`a0yGK(8dV0Z0l6MIU~8R$<b0j70XfOClHi^TI)^*3yFfHc&u|Sb%;b=4!*|k* zlauVb2U$%ykr4|)w)J+K-XQVN>W2MCU46}ZRvRQckM0Ghdne&GdOV=?5!l*yeuQr} z-h7B-6&x9PxqtsbcVhbSnp&o&@>repiM3eLyOT(*{1`$S)jl06 z;-dbb{e1snR6uvvyBjIJ2AOiXC79mWXHB5rip8> z!ofj6Xave{^ovi^AzWiYet4}n@@;}l?(m~KR9PIH@L&6O*t)*plQNes)0g#}#}AyH zmE^X*?~{aCOs;b9U{>9$g^Sxyv#5C=8PSh>60w&8KFql`7^gnw*ii6w@4#Fn*vXlb=)Fb^Dt8{=M*k0uQUQ z*U&>xRg7$;&os=;TQOmh8yW9x>7pBA0M#M$Wxd){8V-vQ8_SS`@Dx7la+!KzIZMb~ z4A!C}D9wu}EE~lM-+!p-Wwf|hP8Nyzf*cm&T`~tY8wS$OEmEiEbrSs}pF60_A^kA3 z^--v)Z9bho8LNbgH?Aa~*l4?ZOR6}d0ublt>A${W%%R?PQ@0D5#ha(Y_T^LmP_Kx1 zTu#K#mRZNXb+fTnD(jqz@Pd?z?Yg;TM~K#NwonbR93U1245N&UF=_vT(3>pCwTd004vp;bO3S>vU$`BrYFpIlu9Q|>)u3N!;a!#!H`%L>WYBJtt|jIk8D)NVbjwh^B|1Q^7JLoz`&yK7WH(2rG&zK+;B`NN zF5`Gg8${N^+Jf#fa8K8h3~yZ?hkz{7kJm$XG8vEiAV9OODI4}SeHdHRTQ!ncj2_Zo zU6f?8;E!V10vI?-=1DR&ycySM=NGzDMxKqoAC`H)Sr5)N5&vfkR#6P-d-T=+Q9{`& zUM)K?ELnq49?RwyIk>cG#WI$nF2K4PQdWi0D#uU#m3bidsHT3G~pf*N(T*1NL$;tOIY6vh<4 zzHgfw``E*x+b+XGv!?Q0>NR~t_AiHb-leRo9%mCosL|>V?otXc9gA#DFKFC zir1xo&$RY+;j-Eu?;H>+FZHPa##V!jGs62mksLaznL%w1)ohmI^q~(I;zloeG(UkS z1*uxZscGeJ24=voG$???Jgn2^>cg`_II`#3I@3sRJ?`f;*=1O2<`1fIIL~2Ee!49ly1ESYcQw=g&uFC~_JPwgH<|I2p!4W9WQI zOYV?E9(9k4ml?SCD%PeO{~Ap$dG%u%z+&&kP&;!yc0qM#qxI)>A)Mu|w^jfEdsT*; z;&1uv)5EX3&=Cf@iC)wE&z}tlXIYE`^cXOtDuD2L1mnZ+eg`brS9D_aLHmUO?+d@f z80%j$tlwi#{_`UY>l(hOE^^mv6C6bX)3P4*txjBGW>R+@MT0RukRQU3-lQ`oKG9}J7uWq~`__PbzhA^K zx!JRL*#GKb;y7;TOo~_+Jjci2@U(uL$-aoRn_vCFfMv~Z8Kjkw=^19QBBOQv88|G& z{D!k%CIhqaaWTAVac^J$XVz^-Rvt6&oL!Xazt1+TrgsdIyXga`!<`2L@Qne0ccc8t z_TgqXJ?^wkfT@TP{!*XKhz=nxbO*+C?B;{^od9wRT<~9!#HQ1eT*#$pMvUkGy3LtU z*#9&^i|CKl!`zOJg0^!T3ZcYuhOL;m8%y`g89i_bIN5l_A3|h|w4%46y+(}s005t_ zW`AAp(fcu&RCw-+310W{BzN}E*VmJUf&UZz(&6^+E(5%hmQH_s>%G#I08{_h^IyLX zPRkz3YuXg}UH#XOZTwOiABPTsx<=t82MpuNbM*)ufg`~#Di)*JIlt-)#U*jrq?UNY zxl03VvrW^jv$rumo;)lr6a2&0`o`uG8+&>SCva}&P{QyVivY%1GP6R69jX!>*rqy6 z)gX3Rw{opN;M*CV3mMbd9D(p9>Z12#2&c6`eC;h*t-BMs&TV{=kf`UjBv18VS^^E!3QGA~su|CE zfQ8!xthtZLDa>PFMlc-b#eL^Ep^d6DaS^Rly{$soE)UGNoPUGeSIwIXo~F8Z9z55GQQ(Vs++h13>SBv-YDAWSTZjRtuliuOzh8wG552Wm zona~-YeJ8{*$?)>W+(HV^L?jv9sK&a!Jh9wmpWj|FBm^V5C@t_k1Kkl;FkR?p=X3N zG9u||QZYintX43l`2W?)d55$4zWqN+scPs_pBmMnXw8zMw8n=TtB*fK%4S%8}J zB{a}F%npYIh;fT=BmdCH7M6JZpk1%Q>?ZLd=Hxe9JJTmwUF&E+u{+-?1o>q zFGhj#(W_g>0Gl<`dL>!jECR**@OE39?6AdlO5Nle1y(bA6`L))ouP9b8wG`Ypc??`LlKxNlMtXJlzHg`u7cHd#^lnbCg`!K!w>G z5+kUJK;y3?RdJ!b{+hAPpnGU^u-^{t<9r>=enFP6sl78-fWnnnJ9X-UoGEV;5|qQi z_9e7xk?;~F;ABpzuiV00WZLgbGOI3gJX%R(hj$5ghZc=u?;MMA1I*R%^xXguxx#;r z(REagG~LFkRJ!vdJ)JHPe6wK4Y8+~EIDK_6@q%;g?Bo1YS#wXM$yTf~+O5~McmG7`v;U8@#sb5xui;_Gf@zP<% zAEb-~1maVGvXKxerj=B9$#Kk&&>$NZY~V(k6cxifwO?lIKAfR*@W>H<{7v%L|IG9bUk~%VT2@8W> zwx4=*?wVVokcmu8uH(3b=pPqcttw_|R`;At{I3v%&U=Pi&B_VEi`UgmLD%UX|h)v3}n@M=0`X6)TPotVo0mAuo_att?`;w!MJ zeckme?C1qZza5 z=g=@+ceY(?Jv@#rSBTW^H<4`6^hcq`$qhp=@+gKxyEccN8O(5jKa*JXf$s6ysy2i+rZ2BhV z^M^1Mi6Z=lLwTCJBTIk$T~j|*0i)OS;dl{`-`D3A3%`pBdI%C9oT<+kx^d*I#wlz> z)s`Mt!HZYIr7vPG$d>jkkiSB7&*q+mzZt8xGQ?(-G|~A2Gf)0Z!E8LL)B|1I7to>*D-TCstToO$vL=$gbDa9;b+?)>MVLKO zs#cD`=jcvl27#sl@9|XKswg}`i1U$ns$20{t~N@rg*%7P;9ny5%|hXdcsVh5=sUgL zmFi_;+-c*&SM}o$!1Jx6jv6z>tW&5SYj{a~mm24fI9yvvp&Lj3m@ZDF=6bC|banj! z0|;~%xWJ|sv883_*W{z4sg^t(HxHSFv7JMKTXHZ%V?K4&kr>6a4| zqgGCjCrp!yE{oc8&EwAxGDR$*U)OEVR*O{@M)lJ7>|kh$O7^#i@d^I9Qn(BY!M(f!DoTndAm0|d zCfNAk3E6E}?;6!AC!>^@yErc1&*xa*m3a``xZVjT-)aW}ZC)mo@eu^~d4X`La$>{= z`?#PKS#2+TG&k3tnI?1nnFn>HCYcSf79>QINk3|qHssyvnm6M+Qs|J%co+@&rr<#b zG{6;o@Ckjj(<%zA`i{=)q8h#~z?Up|%Kr5Oi4+O{_bV^JC+ZqUWIqvZ3 z#F*RvI7!7^X9g8?1^W@eh(K9?zqfX+&AyE<`*>dq6UdCXy&DJp_D4d49=>*yYaFFW z%t{~MBQ5nLzHaCnt4Gu+XOS5S@vzsaZB8ITIwf37foCwoDd{kG-|a7$P0ut!_#x%= zy7RoX70;@eu})272|^!}q)LHFIdM*0&Djruk{-zI@8*BXx6T5+*Q?g1dOr^0WkBo!kEGq{FiwmXmIE^paPF2|4)& z-iy$H-uvrC$io-AKy{Jeb{cjlL%Z!XtnGA#y7CVt$~S`F`uuj6?L{1Shxx-OG6ZXJ z;iSa)pz%3OZl{9(Rn3i2Lb}RZrl*6F%$+-m8<5bPhGk!{4BxkTJ}+5&Zvw_A{Beyr z!4OHFy^?3OdqRep-s&>ztq8NavEf29wGj%o%1p z&%1S`vuZZa4uhsYQTBMz|55%+@It;M4{MH>NOR9{Ag<;kU!#VLjR!_JuUXS2$Aoja zI`A4SPx@PPzkd9n#+146k)0R6y>VXJLQY~@Mc7#CxvMU4C5TQ*(U|ltz3`oisd#84 zng2@m4Bts!VQVK97yLH6Ox;8+wYZMv06slTYiubY9Aez|l;mfd4)x}YaufXm z-hf+{E0OK?9$O|#a-Ojb@_kqd=g#zuv~Mrf(s|E%bZ_^;J)S3CA#e%HHMmJUfsb^5 zAU=NOqz|??wlP}a{yGM7o$z$lfsn*L?qB+0|h9Dc|07S$Z(Hn_P>T+-e099E)bnLNCf~IdsUeu{;nU35Wh<5gr&fErWN%3(S{la)Q?&P-cbx|$v(l;2GidQJ zD&hfjY_a~A6zr>5RNCXM#M6EVD=AGvgV-?75YuxbuLMua$#9m>V;&Hl=ntM|zosQ; zumXj~3!)*g9;cRguV?aa!j)^p+>%hK)S~6uD2b19*75Ph!s)~rnr}B=ePitD<`~dl zHGbplg6kcv&Bg$aV#9>^>+5o#fhB}hfwh)07O}P<_73SU!Xv< z!7+}l%)$zAl3nZ|OSe1g5`;KF0b#BO+k;e{CYM}`BQu09T!j0q5>wt_$i?EZ;?Rwg6b^ij{a`u!)SEurp{iLBEpUu|Sd z8e5F|u{}nJ6)=qUL7UFcB zI`;{t7}NRq9PF=RtfvUcufkXQ)+lA0sjCoCluz5o8X$6&(II|Y^2x1wc{)#$?l(sN zswf|;ias2zSQmFfk`BMs-Sp0@6BZ0XqP%}JJS_Hntyn$L*%gN2_o7X_lujs&UPo#tFY`Q=9uhmn6=6e2%5DgyAQOPAb+825Q* zt19qkcEGxW>WDmunUi?w2c-}&=lC7)qJsg)w}%d*)L-|L+%VJEXpCG?Dzf}-uC1Jq8X>d2d!#H>q?A8Ks|>Bzq| zaSeT%paX?V1vOP2m>EI70&Ha6IT>u?Etd< z{nl(h$7&>bPiT!9>THTAHVIQmV$5trXV7(QZAzW@+%95ZkT@KHXJ?dvJ-{Y1hyG4%#q_ z7OpePa4}dB3gsd7lXUlR|ZZ)$z#^gLC{Kj*+#gwaifI*;C( zgpNCjBCo5WFE&aMyU^T8QdF8+MzhJ;PFlrX-I-^I@V8YGPv;Hmmx(M#szsTtP)5*Y zzS0n$#Ff3PpS7NlyEOBnGr!)-|9oWQ`6jT4fuU^Qw7WkSH3#AeM z8yBE|9rY}@(m6F4a}Ml#+)i{Xtux4tB&7U;730Yyn=Z>kA?NL@tW-q` z=e6sDt%!1>wEwcX57T4Qu;kpwTx0TnoR@9)D^zO@rMTcdhVxE{Avs3=cI3Em0<8cS zgV`B?;Rq638fp8(9;LMdT0(}!U1P=SVkc6Q!gE>}>i}=D#W4+3MNt52Hxst2N2L-D8 zns_YlsKnB+OCol)fkwQdmn&N`%9YR_-}a+6`^O7;n*-V>kz|Cl*R{vhr6@TKO6tE# zTfc$QR>c8C182(kfZzJ!e9=r|tVJss@X@Lq$Xp*#rVs5T@hgo2_Mr5lZ4ywf9|aob zRhjq)t>ao(O#rvUCn?%N<>_U1M`okXI5cl>Op_941`oQcVb=Gu@8Idb%V^S3!E3_$ zYWW6^ggf0^Z};st`eUF|c;f_>g!tQCPoLU0OO%|GQCA^F!7bR#v4JyGf zfOHj%^vjHesJ{#fz$vy2^1dv(ObGjqfY%?+tcwo{{7E-s*a+*{N_A&9y2^#!7iyX6vMDySuErXKAQ~ zFHg)=za(?N3^G6WLA&i2BVvMh{wmG5&=`Obn7I#*o0p7WTjT|Gdg=bC%!4(#p;Nk? zAsS2lCaK}akxaVzQw3Q&1(yeEaxrRD(bZcO3=M2e%3dARHu$>j|ml_X?>so z<19a#fc5W0D>gcKh;?r1i+=e-ME;w7;eeB>mC88s$~;iDN@oii6@5_-w0nd7Yeb;p zJ$yT#m{do~>6jt3B@Z0@!Px%tuAg4k@^D->ok^4!`Vq3G0&fdTi+`MyorAbKSoMTs zz^ZC}6)iKY;g`Hx#(43NdUsJn-r=go+Vtl6t~BMrLR>xQ#_n~mbzI|+%9NjiDIWUv zPQ|on*1fEF>mrrCDkTaV%^ENgYA zy-wd(wfo0g`WydTdcnk9O|?{AYz;sKi3B7G2e$XD>0i?6)tsvSD4_`}yF%B-NzeZU zJ9=&IpLc71K(~M8Zk$`cOVDAix((#P1-cMm@PNnb3x_ZbiLS7l(IlEWBUpx0;atV` zNoCqua5!i@G(@?P=d5Aa3C1KOQ^5p@9#OCnJoW_HAlFkr8bZt@a`c^V5?_7-}Y|R6f^{&oKLoW z;0^;`pQI8XTXv=6Hs=4Um(Hh*YMq;n{13Jfz=r(C_TOA2Aobn)Ads6m7I1*pmY2uTbwH}i;_o}aM{-Y+jQ@>40G#odPVk3e?{F9E zG`^K9O}Pi0APdMEQb5P?=O3AWb5EfE&v97cIdgQiDZJKmJ(FT{w2Y$Mvyk7;*GV*` z{WOJ0lYA?96x`(6`IEj$)T>Qy+&s;D@yIWNtJzQ&|M+obP zRi1YSB?kgBnfXLVLX22|v2H_C^q;?3z5(Xy03Kw@mj{$nL%OkO*(9ku=k@IiBYYF_4>Hi^n z95}`QXm&iwMgH@nYJ#5yX8)L3dLtU%SWb2XK1len0o|>WNKEi#l0@UY=mV^P87$*b zV$L77j90W+X;&#r+;ia-3IC{FG(MjY(e1`tRSN(zRXNvkDG$YCJ~4FAcuvXi0sjWB zw|2qAM-??3tj3pg4wkj>Ts%5i6o6kLlTV3k$uA}QcZ-&S!){$QPgmLThF$Qa;I~OZ8^+b>7x#EZRs;qtvLZ?R3yy|a z4KwwXk2g{s{54gzP;={T*+(W>ex}jfq4iGhr$$%}MhFkJ-il27N*8&xMNHo_g56vQ zopqa8`dLmo+h(bW6*Ysm^GXG)f?Nzan zqc_zaK}>i?t-Lx2#|B!{TkqN;AsM`lt5^QJQ(g;2I*MBmDb7F-2xdOr1)o;9Q3rj0(LopaKw zPStLxeFuyy3VcaacU=-;xXZ$6<588<8Y%VTEcA|}|M(va7fiClzU;=#>qLgDKhv`` z4wKmGi#vl;gaXXcMq%z*I&*P2MWoni4%eV4%dgO7(LIbvP^D444%tpQsMSmMx7d=0 z#YAmN@|?3@6u5Pk$rZ;6-q4j*J-hkdw`-IG#@-!1N;|aDp968<2YMri&d)G)peG15 zgrRe7_%6=9Rmhb2jx!8s;m&rK7u|Y{(ze++3jbtjQr_j3O9=1m>blF@#Pjscftph; zi>l46+jD3nw$(d#P{@fCEz$i*9abY}V=!zw<=UEZhtJa&c6Yc}TQ+gr%11WEoYao6>=IyqNxb<}&L*zPT3#Q}}f5%t$7_UA>)XwuH~9=_-KL%69hN)Ln% z004gjf9=Ld@2Us9=v!Rq_9?k8o?JbwD700o5UD!BIa;f=q4)G=Z1|D>Yh5Xd4M3T@ znHVZdsnP;iN^10y>M^-W+S`YA1%yxWXRaWhl0EQqqSHeX$`$_HrL<_X8kfS2n)~(_ z8SI@W6@r&5WIkX~6`YRrL(IS$Q?2R*k$40h(yZ!pucoCt?mQtlw_{5xE5;lHZ|@29 z;Lrco>){_YS!J)p|7G~iH-^?<4sP)}Yh{7tNk%}En7_J;zgoAB4cw6F^ag)1Qo;SS~5?q!2ema+9 zr^$0;H(0qrTtIU}lgE76ax$zHFSuaWa<3t2l?%^8e-_)7*;=?V`Kog5m*VvWj62N8 zk6k9u_r8AV>=OHQXF$5fAtxbSku1O0_e`e~QuFCFPE`CFtDkgo`&Ra|N=?=lk6Tac zSA;9yc-KT-E9CS%CzmxMjmm+y5A3k)HvZ9mtr*1oNoHj{t#x*+(%Y1e4yqn;sawZr zPbsn!%LFp~1l-w``NXI&nf+FiuRA^0hU9oX4u{TJy_Y9Pq1Osa?X|exz!Ld9HEe@~ zp_N)~b9`ozjJo)b1U(;Z#+l%xm1`GgP;6h%*ln?Y?I#u*?Iu(fCosNRPYKA;IQ%Ev zeoCU|St8?U9*ty7?tOD~^%vQsFzyvR|EK#|GDj)Fs#@=!c|5>0KllsmfIXZrM}vkB zN=j|x8W@v9Emd%y7h{{BED+zkyaY$jjwM!5rfv$@?)CUs+Zqtc5{E7B85;bK!LmO< ze22NMW4LX~jr6A*^)Z8e=r}LYl5;zW4C`g9qwYI849*SH45!T=?+g5J-n-$I^^zT` zNvR=T9xs?8S>I-{B#gaJT#Ktf2s#$7S*Or^{aV6(ZoOCsE7i?JyYoRew&bKVnx&R^ zA;ur9?xQ*+@RKf1kh{59QU4AazZ12rGz$FUkEvR+wNMSKMg+!HN<3<&MK2>hIH^*2 zWUfGed{u5AmJ^U|`aTrzB9v2OtWh7{qZhZ?Z&xOzKeNp&=M~(-+VmU%RCP2nnVlXX z{{_-M*O)Aokk|-Y7AR^OS6WV(Fxhx}hMOtLU&ihvh*ncW9VG11E(RP+3LsFp>U3I< z-Ol&ZAT&TH2Z4SsE*O5}GwYRZgAS$y1<{lz + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {ad7020ee-30bb-496a-801e-a17f67699f38} + Win32Proj + fast_recompile + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + + + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + + + + + + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/normal_compile.png b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/normal_compile.png new file mode 100755 index 0000000000000000000000000000000000000000..4903c6f371174521a7f7f793b134561b3ac20271 GIT binary patch literal 16288 zcmd732Q*yK+b=xQ1VN$(K@vptP8cN!qW9i=^k_qLA_NK1dpFt`eU#BlqC{u(Av$Ap zqnA7R|L^y$@80kJ-}k=ny?3oUYn?Ug>~qfE`|Pv#^V`qw`8^S8DsqJQ)c61ZfKdMZ zI}HHft~daI6aVlIw#24ZEeZRFpXDg{J2>?{Z5L}tx$DZSTe6Q~g z0FZe6J#ZFm=)C~|2?6*nztTNuc zC5)wb9*dIrvi2eQtz_n^PI1r!oM#V10W<@$Ob z+t(0+-cK(|=IJ1>?t2#`(m9NM`0%ITc;~{`#AZEpCUs_}1=->cYNOBS{^Y-&n(Ci* zK6o@RH00#ul*r9-v$dDigqf0nF$C40T=(1FT+^3L$IiT6`stC<(FqI)b=F)PaS1%F zxeOeS1}t3k4N5eggqzkd)C6}%ZE=78;&l};bVC3D1VzYgJ^%pz5K0MQ$H3(=90vgK z{yahm0JPJDI%A6#o_pK{0QMi`(qL~EdWggpHB0`tk8}tTT$=cjc_uDATWe_Y%tM5Kvw>a)c_cWPT|9dEDg$+9MVUW|`#K<_Rlcp=4ZP}KO1uwDa?tSVNZUuhAYPlb9M=ptJ9~(H4yt&T z&Sl)@@7xt#VG6&v0#{OJG7=nl-~)cDl?QL}*TZ1exJzx)ZP&=*-CJegSU9rCRQRLS z%E^)bsT1}sffzNtdK2N(F21Rq{n$jVER$B>SEv;=c@C30-G~0;ZJ*ZBa%?^rj2RrB zYcvCW(2XI^dgC=kWUD-NT!*gAqnI`l_B>0C>pO4eD_bk1h=*C7L_>^y&lZksg>)zK zg%v8jL}+$f+M1Ig8LrMqObw0N_M&??MkfimAUr7SFV$_Y4J zybW+WQZ2t;nnL$FKCRtfPdRrJdw)(oNfgyx3zwYt zm$Q%m5^h2Op-a}1FQQ{LZneKKDtI8T0~14w`=dJ8;J<%C7s2>^cv&p8l_ibi!wV*+ z;y=7wO$Ot(;IDXvtN1i93LJ6q*;lm53@81c_cj}cO&5$rL3+c8OiET465$zg>(a6& zH)aJ^y*J7})lyT9UIRyO)>)b%wj=dFgfA z^u2L%(U^ngql-1-44_uJnfJDxP-aT2>-M)&SGq_UI{(*M1~FHwyA4`pCt04?V@_0h zM!ny38TFY1U16_!r#2{mXd6QrG5(E}HJ zqm^k~a6VV&px#^bj7;3D+A~4C*uR@g!(Kv}<$X{(p?ML_;ho>zxyw-16HYDw-~Tfz zH9Q_VAB!fV%YZ9$iK`GnISWH=b|7R?>y?6{prOWbIs4ZD2G-6hldg zWpv`8H%*dk2W9m}bAtH}N(($QejC?=V~3wlNQ&u1BeMk+Ux3-E`9RPz8S%DO(d=>z0q&a)?OJ)$dcsW?NwOnRoJJvdp-wJ z3^N-Ji*9FnDos`_8WM|yQ6$qFp?68} zzis@Z;eIn`pM-Vd+2ais@*uR%c14oMG8;ovHo=zXUqX|%n50KPn z*`&C0TCuJFCxA?qgH57q&h`4M{MN~lq>2_>?_0D=NtW96V|Zp+qmv^QZ%p4WB%2$s z9Q5&Kv+Ky%SSt1)1Ng1IEO{HpYlQ53xEEL9X_5i9NoL}HJ=A0GkP$)_kL&8j0OsPl zWv4TH{sxQ^i1gU;Y;;LRp(rR-+`ENBR}?mIbv%7=lTsd!_AYjzsn429Uh>ve*A0*I z74dmSir^_IS7n9RwSX-|H;5Sc6xV#duIZkpa*BE%dmFfv^v-+HZ$CZ@9wA(3vDsK~ zFkjr-^i*3{kNHEOseyT>YpMfxn+Z@~^r(eS<9@I({XG2fbbd`QL{4YmNXQ<3r2P75 ztxTLQEt)OMKqtTl?M>_@jt|sN@LmhBvd$Tbp61+S&WWE2^3IAc%J1P(FLGPzNVF7g z0j~&qJtYw)s4*yEZ{ORJF1dwz$hJOs*Xy}3q`Pfx)%i8XU+aQZLm@CX7FdQ+;!t}n zaX>o-aTIZp88d7nh0=iSNIc2|-Moi)g#F-F6w%f|i5Wd{zLQ@4SD!=@nl1C{nY>Pr zF2Qcm_Uy#|nN0!vCiP2S{ex`FjW*CZL-sn7+p~GmzqBT-+eFgxMp}I}!W~KXJZbJq z)tB(b&C)*wN(m{E(Zav4-me=EA8~rL$$cO2JpASm7LZ8IjrSLAdTYI}av8vbLY%n% zIqfP8STuS*egTFwqaTod`FC+DETa1Im~CCi3h=Fa`Jk(czW1=p3i0UTDV@pElth*E zLnOB+#YxLaKP7XUg;@2=RQ^CCEZoUFft#Zo=Y1B!7a@&qyWy(MuLG}YiLUDX%nEnk zdXLk9DE^6uXXoc7)m?3799|1-O*{L4jXmX+^FU|ol!{pG2RSW=@ziaQUc}_A-wJx? zrzS21m#P2FSYcLNW?|~CO2mx+MPm5maN6ARd7@@`HqQ=D$6?fOT$KtmQFZO7wL@In zPL4?KE}Hu6H@*%BXTjq=R!f6y8|9@LxbD`+wDPw4Vu3XQ*C(m&^^=%c*%;+_^JOQ4 z$o>oL%Gn#1sM)AP=ik5_GjD|iRVCd2e4V-e-ekLQ_9t>dZTLQjLPA19B&vtF4di$< z5Oo^;K(!g##H6xHu2;mLI51q*q*=SG(YPWYt!GoQXlztxeLgSMY~&ypO|SRKJHS;^ zLaW~O^!xAnVdEHbL&J|`ecH{_6cDP&eYnxl(UIlbO+jrCugAE6jo!Heq1!Q>ZgM=| zLE5is`h(g5n5%3fX}Ly&lSGG|F3th5C03suqrH2iJLLcImsUSP6FAzi`AD)h=tUGQ zTV^9)n?_v!=`Os#y=eIUbvj#7>)AkO;lytr4u1?hz9E2YI+WLY6J#ZG;=osy;N(e9 zN17SfSe?XZKQW_cwBY5l*1@(^=fhd){tziE7w$}=oznh0`cEMcSCPfkWL%66^UW-5Vp$8whhiqlP1o0vP0WKmE< zOtMD-Q}OZiNU_6%s>XmGB0^)EK{db{wajC<`3H~tX)k%isEt!%oI!lgA{KHEWRN}T z^H+RDL=u|t%#+ZW-Es4@jzx!_^&N!68`)ModDgYal*)tHah==}#T@t8?M8r&s!cG5 zQx*_1JHY5OPQR7M6`o$1_F{PXFnCJk3k$vY_5S1SP^s5bud#q5O?uqO$W$noznke1 z#&$wTy<~dOBXB(4<3;Dgp$>(7Ul1czs zCq*pvzD|vd2F9%aT0VEvsnb48302jWc+QqwA#2yV$QDU0*e?+Icjz}u1;M{&8hn}1 zQFz6p-+lLr6}q=dc;=g&;%#KXrlDFPBrPpf8*iHfH+F$jUi7fVCp59<&Yg9f8N9;) z87)$oJ-Q!Awg^Y`xkodw5ZgAB7=D@MO46rCZF15>?BE$+#5aCs?WuB{has3s)H2V4^j*k48$KJ zJa~B#|E;)-b3eu4Ow((cpzT1zI>Y6=IJ{PJ=hTZz%0^aZTW2?V^WWaW_ zJ{_FgXokl$)B6pZHf%f3N4Ivl6cz>a4C)+rgk_M0xYXm}6%dJ%YB`|><%b9|EPy2V z^=l^Y2fEFjq$mg$T_68Y%!I#dyxe`bAYA0+X!7ayBvo<9T;gPu)OJsH=6&-*$;KUE zc=G3O(Kl$PG@;KQN-FB)xhkuMov!$(22reuRWMM2&By*SPK%>Lh%TY&qLa;5+;-B8 zaw86~{^Q$?7bh>t#e{xQ%;gDu=KZ;rhOG)GcTQ_&W`r>Q=j5My^oRN^0`xN}q36i% zWCy8wo}4bS|HfG>{BdzJ+rK;3DZP%-LbffBxZ?og!`uiz+ae3`V}6^7?3dB}VadXR zu)+Vxnfssd|2isIloA8|4i}vRj@!@=Wi3~J84}=T*D^f)ey)1ka_e_vI`h{VrM-*4 z_jJH)iB>{NC`ZEGJ%PGik<3I1S40Wh%75=g{?9c0-#MQDp-0RdMFzpb(8)SY(NVi8 z$Du@n8p}cylIpxQR8G0Tg7q0np{{8NojoYXwIO~lw*=l-d5m?S&nHDPHNYCXrJBmH41Km3MV%9X zns*UtE)BPRrp*LXrAHpr#UOt-+2)N`$s|7?Oa*A*R5Iy2m6VjWgNFGqzX>YV1&>$S zr8VuA1^h7cNxgi!96vtY_RO&ln!;FoEc(aO~=q@z@sgc4^x~l4>LHis$%T9hxXM#m&X*s;)PH0etkefr}uhgX7TbqY(Fexc59 z0}J4l()Twbiy$X8u*fVbh*?M^yRXWEK_kRT8uZd$#qV>Ul7}@52TzMy{h5W`+!$PQ zK$NJY(Uq4?JAj?pF8y=QYQ}8to#P^=lRh_b&eZIx4B<}>Yvpx>GZDqY2ZXQqMiarc z-N#bM=GWNVQ~|Y1rXXOIm3RAUigLprv%yqvXI)2V@NHAJaIV`uV5M*atLTiP9mi*y zL7nN5oxmqbKk##MS{1*K$C;K09>+gdfLZ$hs0odi9tsG-KPh_vv{*Z&>1G_(u#=&- zNf!~C)5ID#ky&38!oKlMxPI+Il8ldMo(0xqUiT%<@+hkjibk)|;hr}1+pON&lZ*8j zzj#KpZh_x>UMqcH-TVF*uR|=Ua~+G{LFANNQg8t^a+{=P@7U73dh@Z{hTx0Wvbr}x zZWmemH&iWG9T03U0JQ_O-%H1II!M0({Cre2u?hV3zQeaC1NK1igJ0)2gzd|Cbx_E99~WGOs|L* z{5oBuzE~&We(I=J>emZ?xQ&ydE?rWcd-5|?=!abk+>>{X;0ep!EXO{m`@GNb*`p+} zN9n%5y2s*#SDI*sVrC0K%axxcrA8RFj~rvM|}1ggQ;LawIgnZ|}l3ecUw#yB)=5hkRWu+)7skY~=f^(3H-)*;mG7P%^i(JQPBk15UQaB7#Q9xXG|f2vcEjvb$2sq#&G}2Uefv9cq&<@-XRVKr z^ukN1NOAF2N@jeC5q-i0db|HMmb zNa&dbSE&>~iij}3!(wT$r~2hQj3+P%hCeRj= z{TFW^u=trEYG#3@i#EJ~O5BKWBxPY#e#FY91t|g$Fm5kgN3T%kQXv zoIg^rc)rq3A^c}$U~!>?NKq@;Z+;m~7G#SBflODr{z4*&hAV3Vla$ z+9w#=QI=%rw-cqqI~CA2HWBrs~$M|`j=Ph;iMui<<92Fj>!3#?n0dMrYO7k`I+J3r>oVF zv?Zdum9Hz?hhBPgJr%tugWB&b*7)vx!>c@7b;&e3+c#`GU602e_2n}W`D8}kueP28 z%BeUjMRu=-bWQx-k~6t^-G1~`k}EMw34Kdylr(KP2r0M6Ec))z`K&_sL=TIN^O%j5 z+x#q%(L`^xd-P$etzu0IG^Z{zRbv;165fZW0UrapqNCfdkGR^-SHYK{M=7ssOw6>` zd)x?c9kIl+tldfrxvg$BPNKO^CR*^h9%TF7ahJa;s;4}jODw=` zrG?q_c0@%@Xw94^sIfdWuEx29u(5t`=upJhMvWl3o+`>(qi?o6HLEwmVJd(4X0lJZ z>3Ce?bkYA>ZqeWjT7NSC_2h$Oc~p!jwNd5xZzrr_61NV?m1Ua3D2e&281CEBVL5q~oPQ_pZo&O0N^Io%8z;K7lvoYZr?C^6j zC!_V#D6%IAiaRd1lbxHJHe!EnpR_An*s0<#c)6MMh9+%Nu{W>CL1<)cU6*jQoxq4! z+cXl2^KZ9Nhq!g0Wld>#>D(s~yRg&aIF0e0YuVZY;PxQ1DVP7sObc8>M!RV{Ii!21 z6C7utF_q}HBtM`KTZ8nwP|$%NIK@MfT@s=tZcx>kMe-Uk=s9n$&Af|_^RWe8Q+!Tg z$yaM3)T?To!Y-{Jw_5u^qYXQKJ#DR5D=ux0f(Ppa6}M--HKHTmpW-`yI5=Zq-n;6x zLVwvNE-pYn1AE!UJ$KU&d0*2`Y|`+`JSmgGLtlqUXj-mGFVK~S$tMZRL3&JRvVgO} zCU_b~zI$Kl2T0B60~B|sC{MzmYUMwfiA(3KeWe3$ueOCIDT%Ts5Egz(<(v7JH#_|^ z@ke3IKg_c>%LaaJn^c%E3ePw#?ATmDZSXPi8n|L z-n@-ebgs}T|MJ;X;-=J1f=fi!6Z&?N%sySopA6X!UBqPgaVD5X9_QUtxPnoSh?mfnsMie0vwH-r4bmxPq*W|+mbgM%oC`ka z{YkyHBR=2+^u3+WHZGUZEXMPa9NDAIaPe`j>-8vK^ax?T`?ku>>Fe<(8z12D`{UUV zZ@IA6&~HjZAU?2!c4qHteuNiK?;_S9P=a=9j;!09GurC5S6wf2VnRQowy5|tff-4z zorb9!aGTzL`aNUEMxkw1I8}=v#nu#lV?irY8oM%5Q95gixttHIKP_Z%^E8@23Zli~ zRWplvTEOo#e|Cbw4Lc(@RyZzW@aTHD^~&FPt7j`*g=9yzs`Zg}ZqWlV9}6E(7h4$) z?XH9xBpu@_O5~2Os>oulqT%yF7|nWc`cG+JN+EG27q^fI+ z(i~n_#8(f2Crx0|0vs1VT_!kA1r#a4DA?gK(HJhBM&i);K*%LFT z4txF(MizRHtgso_nN`a9kMrtI2IiW!oX18UO<5vw`TkqM@m#du+`Qk{odx-T=A!S0 z+%bHcJ65~Bsu+{;DBlnR4H-LEtdcWWY;g}f@K!@ng2XP+D-{Yv7`?9~-#wbAoJy5u zeUa=XuPtF#hd)8bsZ#9{FB6G4IxU=z75G}N}{=Rbggz3?4X4wRgDx#yW~MXKd( z+t@Awna)II<*g>Y(qH>?@mok?n67p--6W^!y+VVHDXlKNEnuhXDA}1Ysvw3b?G*m~ zFIAH|GUAjDueg^23U#K-Zyyr;CrLxP7d*jbcPHgNnVwlUHtx`|dV&;&s$mL5?ZH5YkM`+o((uGiJop`Tx!4!2%!$^|yr$#GNPu4Ufd#6;gz z({_CDyNEx!-QC`s{RiRm`Szmgd*E3w*L9}qhz*YvpPqM5%n0G;@cc9IgZ;6J>)DEy z>yx?5)4lqYpzMZote5z2N(=r&;E%2BKPO=L-_^=MsH#6olRogOiWa9?1}l5@jM#}@i^=U+a;sGjj>IVH z|EJ1eI4PSu7E21HJs4lS^#pd;I0Ifi7>1jRV0VK5rh?OsD!e+yMn{9e5}Z=Y!I95n7+&xWvzTD^o_p{O@|f3X<>9>_ zG`9Cp+K%Vrvi>B%^5(-3UNqCw?5wngn)e<=R(S)%{Z@gj7$5DQR=m?mZktn zz`Iz(G;z**Z#NA%ND2crk_)QT*14}#ls~*i+@gw0pt6*G4Y@WI68XZp*YtTEkET(R zkzgEY?3GDgIucjKO zcZ)*@_1;1$8K+sdcC6_MU$}0h>Ce+i8)uB^Y>5o%Gd~1yH%^LZIH9R8G{v=iMnvraD?% zAtjz`ZNJBdhg2(wTt*TlU*(O~DYwe*IyNqzq(P+$(>C^nX%>X$&&Wx`;4@bxxF>mT zhG@Wv_!9uPm{xYE5iaL6b=C4@n(TBk=ssBsyuR^H_inhs{w?IEZeuI0a%bqVp-p zv;`J_!NEii`JZ}m2yfYo_?BEh1_C7hab95`{>SCpC4F0!^8C_@`#l zGI2LPg);a~yGI0^`9Tru!p<+vc#egSh2OjaJ;$|2o)=ZLf_~+o`Mg0&cq=0Qf`(Ap z{L=ER9gFW)scwP^4ve|!k+>dO1|Bhaf~J^WKeEn3amhc>14@3tsHy6vR| z9ttPAq6}n*$|B73Bh8AA6U)cnJQ2-9jPhk5Y2VGdawkuig{9Js;QTOpjp!KLumjTQ zYO#GsC&WjQmbw<22oTpo6W81p?O{6OhVvxh?_*y3texfd&Htbqblp2%+8@4GrYR{S zYsb;I6>l3n>$x%cUi>3cNTG`tOwX#Q@SXO~pF}SIEk&2M>pEH%Db7JjIcf0hhx=ah zs8;<4kndqTn`7?z_y~RF5-qFyVq(6~t9!UPq)KYKZ4+kF#)KKHRIprwa}I-Qvs zO*A%!#lnJnsdN29CcCa5V<3*QH}posx=8ZQrFn?|`pqnCnZ|uMr%M=L=i z!UQ3Po0(F#NulI8)#av<_Atv{VrL?r{#mL|o`*Tyu)L-?U_s-IM8wl?7P0GM`lv@W z@OElvM9aUJz0oNy&QcC=wo=tY2btO?X`eahC}GuV;b9SY)0a>)X@;kgz1(A77&G1N zr)#dIkWHtsG2|F?uL|@-!5W7)eWPuD2+Dhdi};RQ+T%P`RVq zDLe2_;NsO*&u#v3`{FHbo0Y9n7?$KY-fugb0x^EShw)R!*?(fH+3qnd3o*>g?H5!p zsjGZDZEsE8+er=&r0RT_JawI>T+@)X$xa>(*?YXr_<2<$-i?_{!1tO{FKk&@$SR3_ zrbTBe05jzNM!;X*wiKywN(KQAeuG+Nlq@}55v007m|m?0pT`7XT&9>&du`W61g{`~ z#}6TjMR{2wLjo^6B?hnZccUA!lK;Y3IVYm^mWtOIb_dY1GcI zG@rX=?prQXY2+<_5bA4o-;={k+57_A_wb$Bh_U-k-_EN%Fp<1X$Uo%PRPvr_%+ySm zO{5qV%O6rxwnN`lEc?pAfgkv1lbqe1=p#~I6({zn@R$ghr=&C`J7fTrEW~)drq2->IO5$ z)-`q`BTy{zvnFFY$`XEqVFIM`{5us@+glhP*m&#HclQu~DE}a~V+c6tvyMJ$U;3h1 zoNV>wJvr-uOT(Q&`IG#`44Nf_3pw{edxZqN%=(Y)2J9!ACnBlMgN--#t6)E<9E7Ji+?t zVxmE0oA+sO+hyR+Lx6YA__vFlB$ZlZ8n2*3A81PkQCj}FD_r&6-tTun1@*BgC&%Yq zN+U=fFV&DOm`x=VY%m*EJ*mq3yM!svA?sQ1N6ZJ{fw2Z{+22?uCv?n?b4FO*gb zzs-vrPstc!ZASA&_}dO(xh$G0)D4tFnRsC!t$29QOBI1u|akS8BZi&f-vB{7s z--0+9Xv3NbY=HZD>rg&=C?$FcS3LN>8z(*3tY#1dzVPt+SU_5G|I=3r&Fcj7Ws%f{ z^lc*b6;1f~gtY8oakP@x@re-qZMOVnjqqLWIjv0ggRU=s1ZV8>M`{#kBc3_d7Fd4S z7f!M_4tb=xgqjub(_>2a36Kr>u7g18O}W28lAIn#HdfH{cS!b$qm;ELz|1phBgHTv zNd%N=DOO-ZJ{x|4r|Fr3*?M@25K}jpcwWSLkuL&R5%{qAs;6-E-K8Lz+NcwZRzMl# zIm<+5Xp)KR>o8iz->I3mOoiMp((PAJKprP39sSWL60cncF9=uEp=fJnWQDKFer~Cn zd>mhGj_>ZPL06T&;T5Drek}Q3tm=TZCTxS>@`uz3(XO|~c{;go^+q9(_{ez1a56ybwU%z29u4C z!i0wGb}U4|XEJ<^RaQR!bFv{Kdt1qswfa7aOSNZumCZ8&UPNO=U(_I;%KX0-?3Xyd zKxPLwscd|H;AX}RMrTN6Msa)HP*)<9?;j88l1_qN{s?bVnwx8hxyh!*4U*6I0UE3c zT=rf@faEcsQRRe~(E4tiHPF6QtB>6Wn7S9;x6#Z(*6P^)ZY;!nYnn zl`e?-GjQm_y^1Z{RBgNL$Q9t9+>Yq{N{lLY<^K%UGINK$+LRwZ@H| zrmtl)Rme7g8|X=%`dU5=_;S4K=lr9F-=X8A@Nr$uiy>h^&W{`s!qTbAv>~W5LihQO zi}3gE;_;q)R5F_senH23p`){?&$~TYt*&vmIq85 z7nb&CdsSIJ`!#NE;B^0q`r4K~cswHD&;#1I+U-Pw;-4FJY1Kwy?2G()=&VESO1zjV zpe3o9Q9CLyaxyAdQf4f(fuvo%?@ynW`n<8?$(^iC$MtfO0kv>O>R5QYu=vtY;ht7- z7nscG5kT$7#1P^n!k{UC_Zv{%8d_|pE)o89(W^q>^P+enVu#;9pB`GH6}4OOS+-U9 zs3ymT+o*<=@l}$_bjcE{F>n$ohNZXp$ zcS@L5_PmAqI8r-+G~-M@_Qh&X;G;Hv{#Neugief|t5HFT;kk8L6=jopP{0r=^sa5srzPzD(a=8p_|(og3CuLc#A7 z>)y(d18fZQ7PKrpi%@#8fvjuQhPkL{E%*B7g5`AkW!=$Qpvt>p& z{}FObgkx^diq8-n@WLlr(Ua~~_Lptt3T*-lPNZ0|L=ODkO!-K3s}Sl1l|9RniIp=` zj?c%g0mYdQAfNKFC>0sPf{OIOhGk(@LnWcG6LEGx5F_GsuCD@@)?C$gROFe?Kb$r$ zNs@zO$DFu+y|~xmg9gURo(d=o1D&9(R!fCQ9b<#0WCpd`Z=%*0cpC#Exq45dT&cA; z3&jO4%BB4+5YQkeZ)Y9qU1azx*Os&ji8iDnG) z^i2r4b*}78i$lbo%!$q&jZkf}(hVUtluGditC*G{CR#;xjolhK(1l1m!ii=DJkPWv zRHOZYP>{i7)O{{$8{jxFXf&v(H-KvEzf~P7$|^7E-DxXb1-cbwE<8;#%d4>&aVwk} zmp0hvh*ZFY_>bvJJQp7co9LR%{1sug6>ZzzK3}Ji#bQZ{jg4J8JUw(a}WX0Oog-jL@Pld`+Pln?gd&DQI0I> zVZ}4OIzA0V5-G2Q+dDg}C`8lZ{E4Xuz(C|;=n?@#(RTx=$A$h%{3`uJok<}YrkLtJ zY(m*LY`)jukh%pnaxC5Rm%Y6Auh`2XP|(ePl__?4K%1wPgMl5cX!Ja_Lf%cEs^*R zex(8Jz@;6EQ{#QwMeh3-X07)9l!`d?Vwed{A(w^KDDXaQE{)CThQ#f8+XbYpngPJg zBYxUAe7$CR1E$3p!-}yf1_uXcUI0?Kjgn(yZ2z-L)BhF0=f4gIaDVyBUPE;d(M+4E zknfSSw2iJV#O+$xw;xF-5-0g8qku>fqsSMbK4~CkI8cDCWpp7BI*Do|`&cb!P{?29 zT8~D)rR}!}{DfC*CXQ1Bxy7)ZP>;;8XAQwb$p$rY%RjZKDGk=51e+}EN+#)!w(fE> zVmBV#nEo||4GZ6=lFx%OWj7y~eJqsb@+o9L&`KW9P4U}jcS_1y=cKHw+44YSV7JB> zKXt0c3|8~bKkazhrK%5jcqwFOgf`XDdq~xns++FcvoR;X)RQ9{)NUZ|$klYTaXitX zsoI^`ZJkhH$bi_$3fCV&cmo9t@+3{!xi#u4aca5)F4Xm2GPLkE(A4eMWC>HO0IOdyd!R)1#nl ziuVXknSkN;DvB>FxJ@8hOq4^NteOyW{#Sg7?>N~KKe)ntf-S=wQJT0`D-0p}R&4Ld z3N0g3^!UwXnI3+9#hm}xR#cd+faL4oS8&SHZ4$#dt2Yc#?w*dy8Y7$a!}qhoRHUf{ zjP-?r+0#A^<@Qv+7nSPJ&q`OuU|qaCDG6!0SJDa9q?_T-OrPUo6^Hnd{>m?WP_5%1 z>O5UCjG%MMz(x&!xQ0EDSl3g;0j!}fFtn+kDL_SA4uh?_pYEPu(Ea1a#=$e^&yw{x z`Q+c!8khok()Pk?*B<@q?I_jJ-q*GhHI}!j)CZJ?95-|O8Dg{c_1O`yqX%(BOs3o5!JkUP`pGZ@?Z$(`Vnv%ZLi=S$o{ zu`fLCRvII8AhWT0Wg2adw-X2!GM_}wehV8d2h-#!m}~ygsPQt3r{h^)b?5b%g@I=c z{Bw%t@rx7Une#PRHr`FGO&_B8Yx^nlU(JBmfA)#^wI}?oRE+f_8!R2AY);aQQ1*x& z-z&D&?zaY_-q)=c174<{G7ApI>IaHHo7>EDWLelae^a z{azqex97-#`*7ch&dab;xF^Z9r662esV=j^uq=(KcWC;PJ3->01 zW-KJGUk~^fvNQ68Mmyzg!;Lu~aVl+EzEOiwIfOnwZOxUq*u+U)UzNBt$Fcb}Kc7K+$0j4x83Su%idevpA8~}uKO5rm zcyWKqdNm&_#vdWZKn%J98wr9!{wA<}(VnpU+_*@D!)@@_0OU`Qx^ZB(3op)7cX8M5 z4Do>su}p)L-Pl6bodL2e&o1_5eIWp8)Hd(o{~!kVAI!}DUq!qW8Hw8?372qyS)3X$ sx9A_C%7<7C?~{f6i*#_+1ch6AlLIN1Imi4ltOW+h%c#67mof|aFV?n;R{#J2 literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/sample.json new file mode 100755 index 0000000000..5f703d1e6c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/sample.json @@ -0,0 +1,35 @@ +{ + "guid": "1457B49A-2CD3-48E5-B3A9-753EAD2D18F7", + "name": "Separating Host and Device Code Compilation", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial demonstrating how to separate the compilation of host and device code to save development time.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./fast_recompile.fpga_emu" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "fast_recompile.fpga_emu.exe" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/CMakeLists.txt new file mode 100755 index 0000000000..1bf5ca6de7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/CMakeLists.txt @@ -0,0 +1,119 @@ +set(DEVICE_SOURCE_FILE kernel.cpp) +set(KERNEL_HEADER_FILE kernel.hpp) +set(HOST_SOURCE_FILE main.cpp) +set(TARGET_NAME fast_recompile) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +String(STRIP "${CMAKE_EXE_LINKER_FLAGS}" CMAKE_EXE_LINKER_FLAGS) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS -fintelfpga -c) + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS -fintelfpga -DFPGA_EMULATOR -c) +set(EMULATOR_LINK_FLAGS -fintelfpga) + +# fpga emulator +if(WIN32) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set(HOST_EMU_OBJ "host_emu.o") + set(DEVICE_EMU_OBJ "dev_emu.o") + set(DEVICE_IMAGE_EMU_OBJ "dev_image_emu.a") + + add_custom_command(OUTPUT ${HOST_EMU_OBJ} + COMMAND dpcpp ${EMULATOR_COMPILE_FLAGS} + ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_EMU_OBJ} + DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + add_custom_command(OUTPUT ${DEVICE_EMU_OBJ} + COMMAND dpcpp ${EMULATOR_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_EMU_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + add_custom_command(OUTPUT ${DEVICE_IMAGE_EMU_OBJ} + COMMAND dpcpp ${EMULATOR_LINK_FLAGS} -fsycl-link=image ${DEVICE_EMU_OBJ} -o ${DEVICE_IMAGE_EMU_OBJ} + DEPENDS ${DEVICE_EMU_OBJ}) + + add_custom_command(OUTPUT ${EMULATOR_TARGET} + COMMAND dpcpp -fintelfpga ${HOST_EMU_OBJ} ${DEVICE_IMAGE_EMU_OBJ} -o ${CMAKE_BINARY_DIR}/${EMULATOR_TARGET} + DEPENDS ${HOST_EMU_OBJ} ${DEVICE_IMAGE_EMU_OBJ}) +else() + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set(HOST_EMU_OBJ "host_emu.o") + set(DEVICE_EMU_OBJ "dev_emu.o") + set(DEVICE_IMAGE_EMU_OBJ "dev_image_emu.a") + + add_custom_command(OUTPUT ${HOST_EMU_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${EMULATOR_COMPILE_FLAGS} + ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_EMU_OBJ} + DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + add_custom_command(OUTPUT ${DEVICE_EMU_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${EMULATOR_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_EMU_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + add_custom_command(OUTPUT ${DEVICE_IMAGE_EMU_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${EMULATOR_LINK_FLAGS} -fsycl-link=image ${DEVICE_EMU_OBJ} -o ${DEVICE_IMAGE_EMU_OBJ} + DEPENDS ${DEVICE_EMU_OBJ}) + + add_custom_command(OUTPUT ${EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} -fintelfpga ${HOST_EMU_OBJ} ${DEVICE_IMAGE_EMU_OBJ} -o ${CMAKE_BINARY_DIR}/${EMULATOR_TARGET} + DEPENDS ${HOST_EMU_OBJ} ${DEVICE_IMAGE_EMU_OBJ}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set(HOST_OBJ "host.o") + set(DEVICE_OBJ "dev.o") + set(DEVICE_IMAGE_OBJ "dev_image.a") + + add_custom_command(OUTPUT ${HOST_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${HOST_SOURCE_FILE} -o ${HOST_OBJ} + DEPENDS ${HOST_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + add_custom_command(OUTPUT ${DEVICE_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_COMPILE_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${DEVICE_SOURCE_FILE} -o ${DEVICE_OBJ} + DEPENDS ${DEVICE_SOURCE_FILE} ${KERNEL_HEADER_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_IMAGE_OBJ} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link=image ${DEVICE_OBJ} -o ${DEVICE_IMAGE_OBJ} + DEPENDS ${DEVICE_OBJ}) + + add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${CMAKE_EXE_LINKER_FLAGS} -fintelfpga ${HOST_OBJ} ${DEVICE_IMAGE_OBJ} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${HOST_OBJ} ${DEVICE_IMAGE_OBJ}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/build.ninja new file mode 100755 index 0000000000..ef5b645c71 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/build.ninja @@ -0,0 +1,32 @@ +device_source_file = kernel.cpp +host_source_file = main.cpp +target_name = fast_recompile + +emulator_target = ${target_name}.fpga_emu.exe + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule parse_emu + command = dpcpp -c /EHcs ${emulator_flags} ${in} /Fo${out} + +rule gen_image_obj + command = dpcpp -fintelfpga -fsycl-link=image ${in} -o ${out} + +rule link + command = dpcpp -fintelfpga ${in} -o ${out} + +# FPGA emulator +build fpga_emu: phony ${emulator_target} + +host_emu_obj = host_emu.obj +dev_emu_obj = dev_emu.obj +dev_image_emu_obj = dev_image_emu.a + +build ${host_emu_obj}: parse_emu ${host_source_file} + +build ${dev_emu_obj}: parse_emu ${device_source_file} + +build ${dev_image_emu_obj}: gen_image_obj ${dev_emu_obj} + +build ${emulator_target}: link ${host_emu_obj} ${dev_image_emu_obj} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.cpp b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.cpp new file mode 100755 index 0000000000..680da15c67 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.cpp @@ -0,0 +1,70 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include "dpc_common.hpp" + +#include "kernel.hpp" + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +class VectorAdd; + +void RunKernel(std::vector &vec_a, std::vector &vec_b, + std::vector &vec_r) { + + // Select either the FPGA emulator or FPGA device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + + // Create a queue bound to the chosen device. + // If the device is unavailable, a SYCL runtime exception is thrown. + queue q(device_selector, dpc_common::exception_handler); + + // Print out the device information. + std::cout << "Running on device: " + << q.get_device().get_info() << "\n"; + + // Device buffers + buffer device_a(vec_a); + buffer device_b(vec_b); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer device_r(vec_r.data(), kArraySize); + + q.submit([&](handler &h) { + // Data accessors + auto a = device_a.get_access(h); + auto b = device_b.get_access(h); + auto r = device_r.get_access(h); + + // Kernel executes with pipeline parallelism on the FPGA. + // Use kernel_args_restrict to specify that a, b, and r do not alias. + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (size_t i = 0; i < kArraySize; ++i) { + r[i] = a[i] + b[i]; + } + }); + }); + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.hpp b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.hpp new file mode 100755 index 0000000000..b36fdb9be1 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/kernel.hpp @@ -0,0 +1,16 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +using namespace sycl; + +// tolerance used in floating point comparisons +constexpr float kTol = 0.001; + +// array size of vectors a, b and c +constexpr size_t kArraySize = 32; + +void RunKernel(std::vector &vec_a, std::vector &vec_b, + std::vector &vec_r); diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/main.cpp b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/main.cpp new file mode 100755 index 0000000000..2d001961d1 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fast_recompile/src/main.cpp @@ -0,0 +1,48 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include +#include +#include +#include + +#include "kernel.hpp" + +using namespace sycl; + +int main() { + std::vector vec_a(kArraySize); + std::vector vec_b(kArraySize); + std::vector vec_r(kArraySize); + + // Fill vectors a and b with random float values + for (size_t i = 0; i < kArraySize; i++) { + vec_a[i] = rand() / (float)RAND_MAX; + vec_b[i] = rand() / (float)RAND_MAX; + } + + // The definition of this function is in a different compilation unit, + // so host and device code can be separately compiled. + RunKernel(vec_a, vec_b, vec_r); + + // Test the results + size_t correct = 0; + for (size_t i = 0; i < kArraySize; i++) { + float tmp = vec_a[i] + vec_b[i] - vec_r[i]; + if (tmp * tmp < kTol * kTol) { + correct++; + } + } + + // Summarize results + if (correct == kArraySize) { + std::cout << "PASSED: results are correct\n"; + } else { + std::cout << "FAILED: results are incorrect\n"; + } + + return !(correct == kArraySize); +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt new file mode 100755 index 0000000000..0ac5b4f877 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt @@ -0,0 +1,13 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + + +cmake_minimum_required (VERSION 2.8) + +project(CompileFlow) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/README.md b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/README.md new file mode 100755 index 0000000000..2ddfd32e7a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/README.md @@ -0,0 +1,193 @@ +# Compiling DPC++ for FPGA +This FPGA tutorial introduces how to compile DPC++ for FPGA through a simple vector addition example. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How and why compiling DPC++ to FPGA differs from CPU or GPU
FPGA device image types and when to use them
The compile flags used to target FPGA +| Time to complete | 15 minutes + +_Notice: Limited support in Windows*; compiling for FPGA hardware is not supported in Windows*_ + +## Purpose +Field-programmable gate arrays (FPGAs) are configurable integrated circuits that can be programmed to implement arbitrary circuit topologies. Classified as *spatial* compute architectures, FPGAs differ significantly from fixed Instruction Set Architecture (ISA) devices like CPUs and GPUs, and offer a different set of optimization trade-offs from these traditional accelerator devices. + +While DPC++ can be compiled for CPU, GPU or for FPGA, the process for compiling to FPGA is somewhat different than for CPU or GPU. This tutorial motivates these differences and explains how to compile a "Hello World"-style vector addition kernel for FPGA. + +### Why is FPGA compilation different? +FPGAs differ from CPUs and GPUs in many interesting ways. However, in the scope of this tutorial, there is only one difference that matters: compared to CPU or GPU, generating a device image for FPGA hardware is a computationally intensive and time-consuming process. It is normal for an FPGA compile to take several hours to complete. + +For this reason, only ahead-of-time (or "offline") kernel compilation mode is supported for FPGA. The long compile time for FPGA hardware makes just-in-time (or "online") compilation impractical. + +Long compile times are detrimental to developer productivity. The Intel® oneAPI DPC++ Compiler provides several mechanisms that enable DPC++ developers targeting FPGA to iterate quickly on their designs. By circumventing the time-consuming process of full FPGA compilation wherever possible, DPC++ FPGA developers can enjoy the fast compile times familiar to CPU and GPU developers. + + +### Three types of DPC++ FPGA compilation +The three types of FPGA compilation are summarized in the table below. + +| Device Image Type | Time to Compile | Description +--- |--- |--- +| FPGA Emulator | seconds | The FPGA device code is compiled to the CPU.
This is used to verify the code's functional correctness. +| Optimization Report | minutes | The FPGA device code is partially compiled for hardware.
The compiler generates an optimization report that describes the structures generated on the FPGA, identifies performance bottlenecks, and estimates resource utilization. +| FPGA Hardware | hours | Generates the real FPGA bitstream to execute on the target FPGA platform + +The typical FPGA DPC++ development workflow is to iterate in each of these stages, refining the code using the feedback provided by that stage. Intel® recommends relying on emulation and the optimization report whenever possible. + +Compiling for FPGA emulation or to generate the FPGA optimization report requires only the Intel® oneAPI DPC++ Compiler (part of the Intel® oneAPI Base Toolkit). An FPGA hardware compile requires the Intel® FPGA Add-On for oneAPI Base Toolkit. + + +#### FPGA Emulator + +The FPGA emulator is the fastest method to verify the correctness of your code. The FPGA emulator executes DPC++ device code on the CPU. The emulator is similar to the SYCL* host device, but unlike the host device the FPGA emulator device supports FPGA extensions such as FPGA pipes and `fpga_reg`. + +There are two important caveats to remember when using the FPGA emulator. +* **Performance is not representative.** It is not meaningful to evaluate performance on the FPGA emulator, as it is not representative of the behavior of the FPGA device. For example, an optimization that yields a 100x performance improvement on the FPGA may show no impact on the emulator performance, or it may show an unrelated increase or decrease. +* **Undefined behavior may differ.** If your code produces different results when compiled for the FPGA emulator versus FPGA hardware, it is likely that your code is exercising undefined behavior. By definition, undefined behavior is not specified by the language specification, and may manifest differently on different targets. + +#### Optimization Report +An full FPGA compilation occurs in two stages: +1. **FPGA early image:** The DPC++ device code is optimized and converted into an FPGA design specified in Verilog RTL (a low-level, native entry language for FPGAs). This intermediate compilation result is the FPGA early device image, which is *not* executable. This FPGA early image compilation process takes minutes. +2. **FPGA hardware image:** The Verilog RTL specifying the design's circuit topology is mapped onto the FPGA's sea of primitive hardware resources by the Intel® Quartus® Prime software. Intel® Quartus® Prime is included in the Intel® FPGA Add-On, which is required for this compilation stage. The result is an FPGA hardware binary (also referred to as a bitstream). This compilation process takes hours. + +Optimization reports are generated after both stages. The optimization report generated after the FPGA early device image, sometimes called the "static report", contains significant information about how the compiler has transformed your DPC++ device code into an FPGA design. The report contains visualizations of structures generated on the FPGA, performance and expected performance bottleneck information, and estimated resource utilization. + +The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide/top/analyze-your-design.html) contains a chapter on how to analyze the reports generated after the FPGA early image and FPGA image. + +#### FPGA Hardware +This is a full compile through to the FPGA hardware image. You can target the Intel® PAC with Intel Arria® 10 GX FPGA, the Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA, or a custom board. + +### Device Selectors +The following code snippet demonstrates how you can specify the target device in your source code. The selector is used to specify the target device at runtime. + +```c++ +// FPGA device selectors are defined in this utility header +#include + +int main() { + // Select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + queue q(device_selector); + ... +} +``` +Notice that the FPGA emulator and the FPGA are are different target devices. It is recommended to use a preprocessor define to choose between the emulator and FPGA selectors. This makes it easy to switch between targets using only command-line flags. Since the FPGA only supports ahead-of-time compilation, dynamic selectors (such as the default_selector) are less useful than explicit selectors when targeting FPGA. + + +### Compiler Flags +Here is a cheat sheet of the DPC++ compiler commands to compile for the FPGA emulator, to generate the FPGA early image optimization reports, and to compile for FPGA hardware. +``` +# FPGA emulator +dpcpp -fintelfpga -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu + +# Optimization report (default board) +dpcpp -fintelfpga -Xshardware -fsycl-link fpga_compile.cpp -o fpga_compile_report.a +# Optimization report (explicit board) +dpcpp -fintelfpga -Xshardware -fsycl-link -Xsboard=intel_s10sx_pac:pac_s10 fpga_compile.cpp -o fpga_compile_report.a + +# FPGA hardware (default board) +dpcpp -fintelfpga -Xshardware fpga_compile.cpp -o fpga_compile.fpga +# FPGA hardware (explicit board) +dpcpp -fintelfpga -Xshardware -Xsboard=intel_s10sx_pac:pac_s10 fpga_compile.cpp -o fpga_compile.fpga +``` + +The compiler flags used to achieve this are explained below. +| Flag | Explanation +--- |--- +| `-fintelfpga` | Perform ahead-of-time compilation for FPGA. +| `-DFPGA_EMULATOR` | Adds a preprocessor define (see code snippet above). +| `-Xshardware` | `-Xs` is used to pass arguments to the FPGA backend.
Since emulator is the default FPGA target, you must pass `Xshardware` to instruct the compiler to target FPGA hardware. +| `-Xsboard` | Optional argument to specify the FPGA board target.
If omitted, a default FPGA board is chosen. +| `-fsycl-link` | This is synonymous with `-fsycl-link=early`.
It instructs the compile to stop after creating the FPGA early image (and associated optimization report). + +Notice that whether you are targeting the FPGA emulator or FPGA hardware must be specified twice: through compiler flags for the ahead-of-time compilation, and through the device selector for the runtime. + + +## Key Concepts +* How and why compiling DPC++ to FPGA differs from CPU or GPU +* FPGA device image types and when to use them +* The compile flags used to target FPGA + +## License +This code sample is licensed under MIT license. + + +## Building the `fpga_compile` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for [emulation](#fpga-emulator) (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the [optimization report](#optimization-report): + ``` + make report + ``` + * Compile for [FPGA hardware](#fpga-hardware) (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Examining the Reports +Locate `report.html` in the `fpga_compile_report.prj/reports/` or `fpga_compile_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Browse the reports that were generated for the `VectorAdd` kernel's FPGA early image. You may also wish to examine the reports generated by the full FPGA hardware compile and compare their contents. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./fpga_compile.fpga_emu (Linux) + fpga_compile.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA device: + ``` + ./fpga_compile.fpga (Linux) + ``` + +### Example of Output +``` +PASSED: results are correct +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.sln b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.sln new file mode 100755 index 0000000000..248072508d --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fpga_compile", "fpga_compile.vcxproj", "{6271F8A8-6391-4040-BE74-71DDBD75CB64}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {6271F8A8-6391-4040-BE74-71DDBD75CB64}.Debug|x64.ActiveCfg = Debug|x64 + {6271F8A8-6391-4040-BE74-71DDBD75CB64}.Debug|x64.Build.0 = Debug|x64 + {6271F8A8-6391-4040-BE74-71DDBD75CB64}.Release|x64.ActiveCfg = Release|x64 + {6271F8A8-6391-4040-BE74-71DDBD75CB64}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {8122B579-CEB9-4397-AD32-FC1D48EE832E} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.vcxproj new file mode 100755 index 0000000000..2e4c2fb7aa --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/fpga_compile.vcxproj @@ -0,0 +1,160 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {6271f8a8-6391-4040-be74-71ddbd75cb64} + Win32Proj + fpga_compile + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)fpga_compile.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR %(AdditionalOptions) + $(IntDir)fpga_compile.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/sample.json new file mode 100755 index 0000000000..9fa4654c33 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/sample.json @@ -0,0 +1,51 @@ +{ + "guid": "A211FDE2-B037-4069-BD84-C45E354798B7", + "name": "Compiling DPC++ for FPGA", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial introducing how to compile DPC++ for FPGA.", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./fpga_compile.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "cd src", + "ninja fpga_emu", + "fpga_compile.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "cd src", + "ninja report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt new file mode 100755 index 0000000000..4fa57ebc9c --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE fpga_compile.cpp) +set(TARGET_NAME fpga_compile) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR ") +set(EMULATOR_LINK_FLAGS "-fintelfpga ") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/build.ninja b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/build.ninja new file mode 100755 index 0000000000..9dee50b9f6 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/build.ninja @@ -0,0 +1,30 @@ +source_file = fpga_compile.cpp +target_name = fpga_compile + +emulator_target = ${target_name}.fpga_emu.exe +report_target = ${target_name}_report.a +report_target_s10_pac = ${target_name}_s10_pac_report.a + +hardware_flags = -fintelfpga -Xshardware +emulator_flags = -fintelfpga -DFPGA_EMULATOR + +rule build_fpga_emu + command = dpcpp /GX ${emulator_flags} $in -o $out + +rule gen_report + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_a10gx_pac:pac_a10 -fsycl-link $in -o $out + +rule gen_report_s10_pac + command = dpcpp /GX ${hardware_flags} -Xsboard=intel_s10sx_pac:pac_s10 -fsycl-link $in -o $out + +# FPGA emulator +build fpga_emu: phony ${emulator_target} +build ${emulator_target}: build_fpga_emu ${source_file} + +# report +build report: phony ${report_target} +build ${report_target}: gen_report ${source_file} + +# report (S10 PAC) +build report_s10_pac: phony ${report_target_s10_pac} +build ${report_target_s10_pac}: gen_report_s10_pac ${source_file} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp new file mode 100755 index 0000000000..d0e1dcb963 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp @@ -0,0 +1,118 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +// Vector size for this example +constexpr size_t kSize = 1024; + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +class VectorAdd; + + +int main() { + + // Set up three vectors and fill two with random values. + std::vector vec_a(kSize), vec_b(kSize), vec_r(kSize); + for (int i = 0; i < kSize; i++) { + vec_a[i] = rand(); + vec_b[i] = rand(); + } + + // Select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + + // Create a queue bound to the chosen device. + // If the device is unavailable, a SYCL runtime exception is thrown. + queue q(device_selector, dpc_common::exception_handler); + + // Print out the device information. + std::cout << "Running on device: " + << q.get_device().get_info() << "\n"; + + { + // Create buffers to share data between host and device. + // The runtime will copy the necessary data to the FPGA device memory + // when the kernel is launched. + buffer buf_a(vec_a); + buffer buf_b(vec_b); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer buf_r(vec_r.data(), kSize); + + + // Submit a command group to the device queue. + q.submit([&](handler& h) { + + // The SYCL runtime uses the accessors to infer data dependencies. + // A "read" accessor must wait for data to be copied to the device + // before the kernel can start. A "write discard" accessor does not. + auto a = buf_a.get_access(h); + auto b = buf_b.get_access(h); + auto r = buf_r.get_access(h); + + // The kernel uses single_task rather than parallel_for. + // The task's for loop is executed in pipeline parallel on the FPGA, + // exploiting the same parallelism as an equivalent parallel_for. + h.single_task([=]() { + for (int i = 0; i < kSize; ++i) { + r[i] = a[i] + b[i]; + } + }); + }); + + // The buffer destructor is invoked when the buffers pass out of scope. + // buf_r's destructor updates the content of vec_r on the host. + } + + // The queue destructor is invoked when q passes out of scope. + // q's destructor invokes q's exception handler on any device exceptions. + } + catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // Check the results. + int correct = 0; + for (int i = 0; i < kSize; i++) { + if ( vec_r[i] == vec_a[i] + vec_b[i] ) { + correct++; + } + } + + // Summarize and return. + if (correct == kSize) { + std::cout << "PASSED: results are correct\n"; + } else { + std::cout << "FAILED: results are incorrect\n"; + } + + return !(correct == kSize); +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/CMakeLists.txt new file mode 100755 index 0000000000..35161c6113 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(SystemProfiling) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/README.md new file mode 100755 index 0000000000..12960b3317 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/README.md @@ -0,0 +1,300 @@ + +# Using the Intercept Layer for OpenCL* Applications to Identify Optimization Opportunities +This FPGA tutorial demonstrates how to use the Intercept Layer for OpenCL* Applications to perform system-level profiling on a design and reveal areas for improvement. + +***Documentation***: The [Intercept Layer for OpenCL* Applications](https://github.com/intel/opencl-intercept-layer) GitHub provides complete documentation for the use of this tool. The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + + + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Summary of profiling tools available for performance optimization
About the Intercept Layer for OpenCL* Applications
How to set up and use this tool
A case study of using this tool to identify when the double buffering system-level optimization is beneficial +| Time to complete | 30 minutes + +_Notice: Tutorial is not supported on Windows* as compiling to FPGA hardware is not yet supported in Windows*_ + +## Purpose +This FPGA tutorial demonstrates how to use the Intercept Layer for OpenCL* Applications, an open-source tool, to perform system-level profiling on a design and reveal areas for improvement. + +### Profiling Techniques +The following code snippet uses standard SYCL* and C++ language features to extract profiling information from DPC++ code. + +```c++ +void profiling_example(const std::vector& vec_in, + std::vector& vec_out ) { + + // Start the timer (using std::chrono) + dpc_common::TimeInterval exec_time; + + // Host performs pre-processing of input data + std::vector vec_pp = PreProcess(vec_in); + + // FPGA device performs additional processing + intel::fpga_selector selector; + queue q(selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer buf_in(vec_pp); + buffer buf_out(vec_out); + + event e = q.submit([&](handler &h) { + auto acc_in = buf_in.get_access(h); + auto acc_out = buf_out.get_access(h); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + DeviceProcessing(acc_in, acc_out); + }); + }); + + // Query event e for kernel profiling information + // (blocks until command groups associated with e complete) + double kernel_time_ns = + e.get_profiling_info() - + e.get_profiling_info(); + + // Stop the timer. + double total_time_s = exec_time.Elapsed(); + + // Report profiling info + std::cout << "Kernel compute time: " << kernel_time_ns * 1e-6 << " ms\n"; + std::cout << "Total compute time: " << total_time_s * 1e3 << " ms\n"; +} +``` + +This tutorial introduces the Intercept Layer for OpenCL* Applications, a profiling tool that extracts and visualizes system-level profiling information for DPC++ programs. This tool can extract the same profiling data (and more) as the code snippet above, without requiring any code-level profiling directives. + +The Intercept Layer for OpenCL* provides coarse-grained, system-level profiling information. A complementary tool, the Intel® VTune™ Profiler, provides fine-grained profiling information for the kernels executing on the device. Together, these two tools can be used to optimize both host and device side execution. + +### The Intercept Layer for OpenCL* Applications + +The Intercept Layer for OpenCL* Applications is an open-source tool that you can use to profile DPC++ designs at a system-level. Although it is not part of the oneAPI Base Toolkit installation, it is freely available on GitHub. + +This tool serves the following purpose: +* Intercept host calls before they reach the device in order to gather performance data and log host calls. +* Provide data to visualize the calls through time, and can separate them into *queued*, *submitted*, and *execution* sections for a better understanding of the execution. +* Identify gaps (using visualization) in the runtime that may be leading to inefficient execution and throughput drops. + +The Intercept Layer for OpenCL* Applications has several different options for capturing different aspects of the host run. These options are described in its [documentation](https://github.com/intel/opencl-intercept-layer). This tutorial uses the call-logging and device timeline features that print information about the calls made by the host during execution. + +### Data Visualization + +You can visualize the data generated by the Intercept Layer for OpenCL* Applications in the following ways: +* __Google* Chrome* trace event profiling tool__: JSON files generated by the Intercept Layer for OpenCL Applications contain device timeline information. You can open these JSON files in the [Google* Chrome* trace event profiling tool](chrome://tracing/) to generate visual representation of the profiling data. +* __Microsoft* Excel*__: The Intercept Layer for OpenCL* Applications contains a Python script that parses the timeline information into a Microsoft* Excel* file, where it is presented both in a table format and in a bar graph. + +This tutorial will use the Google* Chrome trace event profiling tool for visualization. + +Use the visualized data to identify gaps in the runtime where events are waiting for something else to finish executing. These gaps represent potential opportunities for system-level optimization. While it is not possible to eliminate all such gaps, you might be able to eliminate those caused by dependencies that can be avoided. + +### Tutorial Example: Double Buffering + +This tutorial is based on the *double-buffering* optimization. Double-buffering allows host data processing and host transfers to the device-side buffer to occur in parallel with the kernel execution on the FPGA device. This parallelization is useful when the host performs any combination of the following actions between consecutive kernel runs: +* Preprocessing +* Postprocessing +* Writes to the device buffer + +By running host and device actions in parallel, execution gaps between kernels are removed as they no longer have to wait for the host to finish its operation. You can clearly see the benefits of double-buffering with the visualizations provided by the Intercept Layer output. + +### Setting up the Intercept Layer for OpenCL* Applications +The Intercept Layer for OpenCL* Applications is available on GitHub at the following URL: + +To set up the Intercept Layer for OpenCL* Applications, perform the following steps: + +1) [Download](https://github.com/intel/opencl-intercept-layer) the Intercept Layer for OpenCL* Applications version 2.2.1 or later from GitHub. + + +2) Build the Intercept Layer according to the instructions provided in [How to Build the Intercept Layer for OpenCL* Applications](https://github.com/intel/opencl-intercept-layer/blob/master/docs/build.md). + * __Run `cmake`__: Ensure that you set `ENABLE_CLILOADER=1` when running cmake. + (i.e. `cmake -DENABLE_CLILOADER=1 ..` ) + * __Run `make`__: After the cmake step, `make` must be run in the build directory. This step builds the `cliloader` loader utility. + * __Add to your `PATH`__: The `cliloader` executable should now exist in `//cliloader/` directory. Add this directory to your `PATH` environment variable if you wish to run multiple designs using `cliloader`. + + You can now pass your executables to `cliloader` to run them with the intercept layer. For details about the `cliloader` loader utility, see [cliloader: A Intercept Layer for OpenCL* Applications Loader](https://github.com/intel/opencl-intercept-layer/blob/master/docs/cliloader.md). + +3) Set `cliloader` and other Intercept Layer options. + + If you run multiple designs with the same options, set up a `clintercept.conf` file in your home directory. You can also set the options as environment variables by prefixing the option name with `CLI_`. For example, the `DllName` option can be set through the `CLI_DllName` environment variable. For a list of options, see *Controls* in [How to Use the Intercept Layer for OpenCL Applications](https://github.com/intel/opencl-intercept-layer/blob/master/docs/controls.md). + + For this tutorial, set the following options: + +| Options/Variables | Description | +| --- | --- | +| `DllName=$CMPLR_ROOT/linux/lib/libOpenCL.so` | The intercept layer must know where `libOpenCL.so` file from the original oneAPI build is. | +| `DevicePerformanceTiming=1` and `DevicePerformanceTimelineLogging=1` | These options print out runtime timeline information in the output of the executable run. | +| `ChromePerformanceTiming=1`, `ChromeCallLogging=1`, `ChromePerformanceTimingInStages=1` | These variables set up the chrome tracer output, and ensure the output has Queued, Submitted, and Execution stages. | + + +These instructions set up the `cliloader` executable, which provides some flexibility by allowing for more control over when the layer is used or not used. If you prefer a local installation (for a single design) or a global installation (always ON for all designs), follow the instructions at [How to Install the Intercept Layer for OpenCL Applications](https://github.com/intel/opencl-intercept-layer/blob/master/docs/install.md). + +### Running the Intercept Layer for OpenCL* Applications + +To run a compiled DPC++ program using the Intercept Layer for OpenCL* Applications, use the command: +`cliloader [executable args]` + +To run the tutorial example, refer to the "[Running the Sample](#running-the-sample)" section. + +When you run the host executable with the `cliloader` command, the `stderr` output contains lines as shown in the following example: +``` +Device Timeline for clEnqueueWriteBuffer (enqueue 1) = 63267241140401 ns (queued), 63267241149579 ns (submit), 63267241194205 ns (start), 63267242905519 ns (end) +``` + +These lines give the timeline information about a variety of oneAPI runtime calls. After the host executable finishes running, there is also a summary of the performance information for the run. + +### Viewing the Performance Data + +After the executable runs, the data collected will be placed in the `CLIntercept_Dump` directory, which is in the home directory by default. Its location can be adjusted using the `DumpDir=` `cliloader` option. `CLIntercept_Dump` contains a file called `clintercept_trace.json`. You can load this JSON file in the [Google* Chrome trace event profiling tool](chrome://tracing/) to visualize the timeline data collected by the run. + +For this tutorial, this visualization appears as shown in the following example: + +![](full_example_trace.PNG) + +This visualization shows different calls executed through time. The X-axis is time, with the scale shown near the top of the page. The Y-axis shows different calls that are split up in several ways. + +The left side (Y-axis) has two different types of numbers: +* Numbers that contain a decimal point. + * The part of the number before the decimal point orders the calls approximately by start time. + * The part of the number after the decimal point represents the queue number the call was made in. +* Numbers that do not contain a decimal point. These numbers represent the thread ID of the thread being run on in the operating system. + +The colors in the trace represent different stages of execution: +* Blue during the *queued* stage +* Yellow during the *submitted* stage +* Orange for the *execution* stage + +Look for gaps between consecutive execution stages and kernel runs to identify possible areas for optimization. + + +### Applying Double-Buffering Using the Intercept Layer for OpenCL* Applications + +The double-buffering optimization can help minimize or remove gaps between consecutive kernels as they wait for host processing to finish. These gaps are minimized or removed by having the host perform processing operations on a second set of buffers while the kernel executes. With this execution order, the host processing is done by the time the next kernel can run, so kernel execution is not held up waiting for the host. + +For a more detailed explanation of the optimization, refer to the FPGA tutorial "Double Buffering to Overlap Kernel Execution with Buffer Transfers and Host Processing". + +In this tutorial, the first three kernels are run without the double-buffer optimization, and the next three are run with it. The kernels were run on an Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA when the intercept layer data was collected. The change made by this optimization can be clearly seen in the Intercept Layer for OpenCL* Applications trace: + +![](with_and_without_double_buffering.PNG) + +Here, the kernel runs named `_ZTS10SimpleVpow` can be recognized as the bars with the largest execution time (the large orange bars). Double buffering removes the gaps between the kernel executions that can be seen in the top trace image. This optimization improves the throughput of the design, as explained in the `double_buffering` tutorial. + +The Intercept Layer for OpenCL* Applications makes it clear why the double buffering optimization will benefit this design, and shows the performance improvement it achieves. Use the Intercept Layer tool on your designs to identify scenarios where you can apply double buffering and other system-level optimizations. + + +## Key Concepts +* A brief summary of the key profiling tools available for DPC++ performance optimization +* Understanding the Intercept Layer for OpenCL* Applications tool +* How to set up and use the Intercept Layer for OpenCL* Applications tool +* How to use the resulting information to identify opportunities for system-level optimizations such as double buffering + +## License +This code sample is licensed under MIT license. + +## Building the Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./double_buffering.fpga_emu (Linux) + ``` +2. Run the sample on the FPGA device: + ``` + ./double_buffering.fpga (Linux) + ``` +3. Follow the instructions in the "[Setting up the Intercept Layer for OpenCL* Applications](#setting-up-the-intercept-layer-for-opencl-applications)" section to install and configure the `cliloader` tool. +4. Run the sample using the Intercept Layer for OpenCL* Applications to obtain system-level profiling information: + ``` + cliloader ./double_buffering.fpga (Linux) + ``` +5. Follow the instructions in the "[Viewing the Performance Data](#viewing-the-performance-data)" section to visualize the results. + +### Example of Output +__Intercept Layer for OpenCL* Applications results:__ +Your visualization results should resemble the screenshots in sections "[Viewing the Performance Data](#viewing-the-performance-data)" and "[Applying Double-Buffering Using the Intercept Layer for OpenCL* Applications](#applying-double-buffering-using-the-intercept-layer-for-opencl-applications)". + +__Command line `stdout`:__ +When run without `cliloader`, the tutorial output should resemble the result below. +``` +Platform name: Intel(R) FPGA SDK for OpenCL(TM) +Device name: pac_a10 : Intel PAC Platform (pac_ee00000) + +Executing kernel 100 times in each round. + +*** Beginning execution, without double buffering +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time without double buffering = 29742 ms +Total kernel-only execution time without double buffering = 17856 ms +Throughput = 35.255249 MB/s + +*** Beginning execution, with double buffering. +Launching kernel #0 +Launching kernel #10 +Launching kernel #20 +Launching kernel #30 +Launching kernel #40 +Launching kernel #50 +Launching kernel #60 +Launching kernel #70 +Launching kernel #80 +Launching kernel #90 + +Overall execution time with double buffering = 17967 ms +Total kernel-only execution time with double buffering = 17869 ms +Throughput = 58.35976 MB/s + +Verification PASSED +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/full_example_trace.PNG b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/full_example_trace.PNG new file mode 100755 index 0000000000000000000000000000000000000000..92d37fc5dc8788a23480c4e212f5f91066a09eb4 GIT binary patch literal 110050 zcmbq*2Ut_tx;D0Pu;GY`ihxKPr6~#s2v|^vG({fCNYg{5#Az<=*o@_ndoqo=uXSz1LprtM6A*~h7eD?0oGG)0Bm*y-d@T$w31K^rP6Hpdx?MEAC?r&xuygh9 zHsG`H1Ebr%LP7`S1jia`R8lJNKqWT)E=c`|!|}T{@SX4;W}{ zyR`41%<;V!Ty)k$(F=@BJp^5)Z)pTu(@0th(lZ~{p6o5US&2r^)#vEJ4I9|pM&XFP zmwuG~$3L!k^C^xL8VTl0i#t0f_Pa@4@5MU(ZK{M%4*R??9$5$~A`1pFk;n{}yh`}bdgzn?Z2`WrKj^^t86z;ceCiUi_Tz<|ptCmx zy!rl@b-FpHT%Wza@&e}&FdE2jQ>(qr_b&ynFV#B=*3-QkU(;u{37GchpZ4X3 zC$V2yL@ANV5r0D)zuADDD(l^rW7dtq4`9p(TR}RXwSwMFk3HMwR$|XEx5BMlX|*?b z8Ib8if8?R{=P*J-y^nd})y2oGDZ*%}@Ozjbhy!-LH4Z9esXx@yQI)q%S`?D+p1I3d=_DkemH}%W6{Q?m|@9132~_wIXAVi?Pdx(EEHblKHWFg zs*q!6OJ1Lqbu8>(1l9P)UX1wD3j)=3LI_ko(&=ArJmFqn9JAcUqgb zRo9p$1suNR8Mv7KfOXkCa?9MR{X1v!PEg3;0qgi6jDI0af44a0lme( zk>&aZ5Lgr+{Fu^vIqnj(3j`6Ak3ly5H7$B)HZN2@)F%f~4$qpOa$?By z*vIAXrRaLD+XoWi36WxsESp2O=cl&HC%0asX1+`rC70a(gw>2uR5`X9mRowHauV`>4wCBXUYI2mp zjIZi#-HUHq7+SzWjKH7)Z%?<~Qp1P@;V>3$GE?)FJ+7gEFuaR;lD2?EBk%cg0VbYpc(M zynVNAW6F*E;m@ABcJg<8zno5Pb(V#BB6CB*M?Mvs(jc_ftD|Q!lHZK4Y2~PV?3?Z< zCa=U(v#(LLcYuu>uV4mwRtlGAWf?AQTQPZ**PK#??2iuT+Y(>)!Q$129i6(KTyx{w%K7l(e7UWclUdrY+B>fBAbs0+t}&}z1G?ix(h2GOrG_s_Vt+bBYCw`@b!a$(r> zDjnR8fcN7SmBxB2ZH1vF{39)&db0*53x9UdL02A=u(qR^DdBQ$L1@|*Dg4(JNf)M4 zZv~RmHrL^3EjFqW2;*1wdqT@^$$=7FBA0Zedm(7Up*+kfq`F7!V&bD zX`Px+1I%~|CuGH%U`rvbSz{Say94CmKRW-ZlMlznurBSym0g$j7>IU7NEWcES7~3l zCbxB4=kk zmf1L9xbr78JmaT~{yetR{aq8y%&kA2Oy?p$*E-MXDb8s2yWVPuhu!>ffLt#nBYzumN1aAh2AU9 z`%^qMmUPQczgTrkHfX;tmE1}iou>-VvjZy4>*Bm;2_N@WTAs;6TG19yrpPj1z=8#& z`Zb%lT8#|LH?Azuf~b&e~G_5hfv?~6Lg=(uzX4xNNR$kX{GO- zU-F7Gy%vLzFZV4+YNX~J-tnr&|DeZ;2W8cSS~Uu%$-@+S6aMCvAG+qwWv7tNjiI7) z#%%5T<_ch7t{mjkFM|eekJhK+6GA6#p-}m+NxqHhSj>jTQI|Dp)TkBp!z^+|rOq1b zj#t1@L(-?y3uc*%pyU>_7XN9yeXUQ>j2X_ZN+t|{_+w?~Im@DhF@}eynXz*2s#V{R<2vp=xA9;SJE_2+Fh1At z3`7n>q{0;-F$lk7&c3}6d002rLmp-fvQ;}ni10q+ABUV^7?AT}!TQNoNR5>GT$;?^ zoa$52PtS&Zb4dFWaP?y?n{};j_ua+=(ChJ?3tzk{F{an6=~&4d-kNu{2eZ1F>*xMT3FjE2ZOV=uUGci#Q^8|j#Us>#+}Hm0;S;X!{S_gUM2@CsC;{WzVRp#aVd&rx z!fK$n>In^_c$cwRNuy~;7-F50 zlhjLzN0=Q z^sJ(jZz0pUM-zm8UU=ZhGs)XIglLW9+ z`hjC;vr|Pf{@O`*+ivJZX!p#(5GG-FpQ-;3m^>UZb(e;rRKO*U!6)zeM?Dlz^_5#$ zraQAHVDhk=hf~VBnejjUO**@z>z`ktnVS4288|t&Lq6HT2>b&vb&NHO*$@S0wnmZ6GxqYlm?2a87U*_Cv zDd|$O6&|`P2&!bw_1L#Kk2e4PTFf*X77yYBBkArOHP^Xv{>6#E?3>WT&gbK#!JObD z7a6C4fU4*I$u&ArlH}n0GU0rUw&R|^jr7H2Y?3j5VrYE8D-n-UUEwF2KQT8wGTp28HZ~bQ%9~yK+rxpkFB5>%iq_bz|2X|n zPtA;swJAM5mUTcfe3kXekO(pNPfiFq6fdLuSCsm&c&ce`ls_Br z=3w%s^P2U)Op3lRXe8YL_1XpDjO1WgQ{17?a|9+d6J_5J8gy=<<4luNg09Bm)H!(F z)tmo)xJ6zQJO9<>eeNF-vRy3jhB6z6j!O`i{e^KRD-l!B-bGNP(u&T$uud4f^5%nU!VfTChjiYWx z9ziS(KRFe$JQitO&A`^a$3uGFfBg7yYp4GD>Ws!?1E;R#vz>PJuO@m~N0{-nb1R>d zi4qG9yx9zx=Fc^{dw46m$KCQK>6fLr*Q)@km$R=1@A_*3mJs$Bw|X*L++nGP66dt= zNmHb9G%SR`KlO4lx6gN>-EmEArmS6MjE=^2raF2z9~djo;m<^H9bSb^vvW#<(S##54F4JeEs$!{jWEAJzgCqAX-0pmbJ<=WSNkHtb<_-8Kcj9 zCbI$#BIkIc>uXg$!dfY4Jr$|5N30bly1c2$!>!&iL#MBiqpOCGO>@q*g@y6o$}#~$ zq+vA4PKEE+S4ry;7y|X=P%r~CmYlQde2!k2TgHstsakicZk<0TLU9a75W@D}A3gH8 z1&P+whG`y{TIpZ#T*rSSRMunWNDzrI|L;_|?g&OysciartWan5cDn3cu!f1>aY?iD ztPhW>Oh_@ZF#YC&FiDo);+ZbeaS7x2i_wG6&45+bbj|tp)}s2@hLlr0g=Dk!^kxCa z+*7~2f3mX)7tUu`gsm;)MDErYI2Jk=GRi0n?Yh5yB(fzLt+^pp!`?BZ(648)Ol%ucj(a7iA{nC~)7%Zdv^knFS_~_5hW6G67NR0XIGG~x9 z7h5}9J7Lf-tSWWaPeN8fwdrqz^{Bd}ahFWQ`NP?0e3@R63k)GBd^$FkwG9DA3aPW~oE_t6;9AY~B6@Nwg!|i~prd zU~>G4%CZtPcKyBP=u(z2)awhqiR!-U9X{=CQ)>kfl~Y%s-*_mlyXDns=!})vx(C_2 zg^1OtoonDlFnv+Pu6U`nZ;@k;>ukpuzDM`?TEk-A>tWu=Qp4+^ z7PLS2v2s!&T)9GW3F1Ocqc*ZJUdr^6^`SKkhN+-J30&Oi#_O%2d)U#95mZ+5XDWoZ zzBae6(&m@xE&kJ-PC&^?Rh<#fQ_>lpP*S60Ww3*KXI8*SkX;s);pWQ3->MCi^3JGQ zUkvI!cT2_NJjbpX9_&H;&dCc4fYJsliTJv3-|Awmv!aX;-&8>N%WlBs7fj@fv)W4R zV5Ukf%sw+-cUj5#wYT~QV>tsYukv6ymq~`TlUI2?-ryXt=8YqBs;~lnac8!UV%n{W ziM^ZwjzVugA+8-39_)Y**A4d%Jy{OBU#lBEO2`>nz#$j`KVwCw zpI4YHy_&p8B7RA8bf8-C-2pp9Wvp1&v!xPb)>gh_jtxfE=$)?|L_P)C^fAtu-DR@R zt-T$ln=Yq6JKJlnbb7*{t}sixZ|OOHogu%^8Us!l4mcdFryb-u?o@$D$K&}?%PZu` zxAKO}bG>VTCsug_^C61O$zaM$3)cf(UvQk}&y57Eo?}v4N=QYPp2R~-eeOypvh;F< z;4eofr;f=oUxtm!^J*m=6@A&K2{>=0Rpoo#FVAj9$J{*3-E%EUOEjeg?Z7VnmMg`- zUosptnI%S`dDg!17%aPIZ;V0PF-KVO0kKdiuY$D-DGY6`jnH$YSEZqv+5U%OX1zW= z^&CFKT66N4QOJC7sZ=t-kwo;YWomE+vc+ju2@{jJSkDBRFg;)P@cEqL@|;?U#}eqS ziIc>x9sIka_hOXan}>6I$Uafppc<*w-?on_dj%Pnn$QYOUYj>6olNWMajJk?S`)&V zsw(D0nQ;QxI)|DNhl|DAQypVD1$_5)miGr3Si)-vD{^GDe6k~33f;5To5sZxzUy6=2jC~Nqe;(fZc(?-5&-iDTda0f za@wtEe&r4O6%3;^qG64Nm2{lCcw~;)R)X8FE0uL#lZuPCMl=&8Vzwv6x4P!;K+O90 zm}(w41L?Cu9CG2c^Of#_pgGoPJ7a)$B`RSh%<(Wxr!0n(!O4Te+=$P_aoy7k!q>M` zvvRf9*}NBMMg))%Wm3Y{+pez)s149Hwe>2yh2W>PvCFAY`@mM zbp09?F{v|pqMzMN*ymki9zHa?Jdu!d%;!m41%`Xwk)=V%;@)foCoBatAF$q!ylLDw z*iaD%$ULIe6FYZEqCj^r>uA7XYK}pkt`wMBT5LBFM=Y2GF{qvt`vB$mck!pgvJD1( z_xCWpKakwOX@Cyqq_lWV^pp;gWGFaEqJFy?+3Eh1wU0!#-o(O~yh3X8xGGGPQgl9S zBb24Ckr$1M3y*~FcJB-Ca1N`i9ZYzGh)c$E8!Uv&d2@}8`w<1XA2R(@Q$J;B zdX<>40p)_Rnk%hbjT#E;Qz2P|n_1l6;JU^kf8^8G9z3-T?2tT>x@t;1nqW>Wb|Oql z+lz7PI z75cjU=gwyP*-w}xjNXUW4ZjSNz1p*;o(fn$_$O4K+OUQLrAii}zf?_%3mJ(ui&qSb zkIJD&p z(ijCMWa^wPwsC)4uI0LA*yzlQmoA~7+^_VFGMB+fG-<=5^}SnNS!k+X8nKOTAl|q) zo9Wwm*c!{WZkSJLRyOiw-ogfB7<%o<0HQ>|Py}Jx-#r(ef=;Mx zIsNjH@i&Q3ypCrQLq{@4B8^&P=G3!GU&rh&F^yqJD;B|^69Vf2v)IJ(B6&r(wZ|*0 z9d@_>!t%`tQ7JxpS+M~Cx?-=*jWHRaI*gL?9V`A5aheAij;rHhF&g_G7^kTyQ(Iu; z+R_&bgDZLfix?zOv&pszmQ6UTVbR9v4Xn+GHprST=xgTRP7ieRjC%}|zuZrCzp^yK@XCdHMf zZ5yY3|BPNg6##&<^mBWkac}zG*me{75AdRhQ^JXR%Gb=mZs9$?v|bCb4O>L^!u!usST&1Y9kq7trrVD?phj?~TZ9_7?UL2>o|Cggks zm87G)r=m}TYB)$zGSnm~yuH;kd$cdeZiyxlz~dcU#|HT9R{LS47A8OazXADz}b%cE7$d(Wb5(kO3o{p!Z&NL0&`#B9+GnoDw9UUi{DWaVEaECdHUctpQBC zlzk%ng2vO|b#K^PoYfE!f1BvZ4t6N=5SHCOab^Sl0*zVrR ztNUb9aqyR|B!%T%02cr?EQP{#&+t)`jvYLOIJ??s#f>zUe&}bq?|>jew)Lv7IJ6pl zK=G;M20EjAi!^zoh^IV^4$?XO)!MjFHri3yd3v|zP$Gu1kT2e^b08iN6f1RPXsQkY zQR?Lcfo7j&S4EDUBRl;yOyRXgeLJrs!Fqw7!P`j+XQTcMjKu%QUevFA&4{rjjQ|+y z|2s&u-s)2pFp<>dl@8%U8$swlsTFo-0w|9a=<^{}tMlzmQ3}~M9iB(6WVgWunT)87 zOoq;1$rYi8H8S0suLsC#XLKA3AN`>oC;az1xM2ERVj*fc@D_Q*ugCPSI?`8U0azHn`FZ(hpp=RwdU#; z%3L_LmyGOvfeA%E-lJW3j>6{JO@4z@?8O=Yaeg95dyT!0QK1*kyxT5iN0X6e$9DDr z533H`{TjLeQpnn$Z=NH0P|4gq~VLztRhX9i?_C+;3h_zf)ki8?#F)U~~VSPE_TRJJ!^}U-SNss4I zwKNjaWT-cv^m=V9QfiNJs**5KGQ@*r(yemZ6^Rn<-MO%cLF6+x} zsIO9mr{nS^mX5*4W5{b@ zgv}YRk-7&Rr-=~<6s;oGS8~=*tc_jp+P}CdDPjglTv?s(k9=*#VK}bW3^+8N;a>NR zl<@!2E2!P}{oP23gM@*5G2h|?>T(nBa=R8_ULm&LSmPg{Qa}z?9>RW=x7PielN8S; zu+`XMpKHhS8<}aPReg;8!eG9HQ9Hi=HKR7n(H~&iH%?_^ed0ZuS0jS;swk|$6G%n8 z8FbA!Fv$fp!?unk@4Pok4&xBOcpTVjjWAl_&xJRhD`1Ok=RUZw&zkMH&ZQ_kgV8r&W8 z9_q-Wjt4wVbdXEMYmc(EVml4%?Iu+qaAhMjr)VnTYf-e@(SLyMu9 zio@_XY`p5((WEfH5ypZ<7K1AhGHvV#u8Su_r7s1i*pTT%{m>ouH1^2Z6g&sK`cWo9 ziOxQ9HAk0TexRcMGtxNPS68_Q?vV6z2oUbP|7E-6A;L&7IFS^ccg9LeJzyXE3L*{STX zf4VsiW^5#poQ&7bNzkSjjz6h$nDJADsGqqpxelcHV%8T|n^nVZd;}we_InwBJSJ)k zMqJNbM^Q4m&vc&4Eko%pc3qAU<7*d)Xnjt6!0xjalR_0Mjie@CluyO8N^B5`Sdo*y zmk3#?lwIW_VT(r$Ckb2MwBbc6CUZPLz68d*GMT4mM}hkC+*&>5FnU}2pa=O*=_6Z| z!1%)D>yC{wEJDOCwNjMjqBhr1pL=6e7891{hoPuDQiAx{>6}o!eVwv&eSD{GGCtb% z5FKnU4}lN&^UUmVaOF8GaxCO?iYa)mL|4VK zZw_zh`G-D6A}|Hl@!1NBLvw{gPS2F@vHILBLcW0A=~|d(5Hz!`HyV;1uJJ zbgAhIeVWQhzwZw!5Y3C^%~f|!%3;>ZZKU>0KZ%DqF&B^1O}&C|Ebvd-$+hmUr63s9S_;|utM{ZbF;0T&w;`ey~Hyn(=pxTc)d)|~z14$ftxMgie} z#I65I*?Cj?F8cNa@O*ZGNc^JaC8=lhIBdnu6*C>?{xv$#&NS1=5;Pexbw#U#XEGk zgH1S}-K6NiiWaW4lXq6@hF1t=R%w*mU)kdMq{J5wG{>alwIkW;dcjgJ!>M@x;5ets zmwaC1I`8*erf-p{`o>hARsBeR&@cwoH>pB*>npZwHo2O$)Nj^k7>p~{`8s9xu-aBd z)Its*#V3?y7Wvx8uWd^7n+UX27Uk>ClB_oMa;rvj4O@zn-)gYR4?x4*+n@N)@WGFZ zcUC);6aYnOAlk}^8C~O4HiyfM28<$_rA}O;BLO{r?4MkGuwJWgWpp}#4A4Q|x6k2> zZe*H-BKOXSE7>1#8BAGBs~zHbu81}J7X_(%$cwUTX5b86w1^5*gUH!b zKZ2*jVC(6@HH=cB5J+#O_*!+T#D!B;NS3yA)U%}b>Vfyu*{OA9e z%oQAqo&9cY&tA1bUW1 z7iL+P7zES~95Zp_i#_J5dYLQVBK421&8x2a%;-3?U+XKEt zB18dViJgn*>#t9)$08z23}(W3OtCeN*Si1vr8a=(3fY18mMMS{9Jia)fwT%tkFeEn zG?1JgUDx4c;l0z>LTseoe{NF(2%~on0aP4pG8v%?%7>i=vM=P3%2?-xuj8Ii7cgoZ z!Wq1+OkghWOu?=<>=gltG=6ar?VB}+l6!2T1A> zxu6Lh2ng2$Xb>CkPz!yEMi^FgK|Qky_PybT-RAAN9fiSnkb$r5C_EuHDS(W0D9A#Y zx<^1UZ`tB`_+EZ%M0nzB~( zG*Mpg$!}>7PvPjL7n!%n`Q7X_v?Ko{&f6&WraYwQL#3AdfaB5C&GtQ0aiuqG6t4^>8W(qHL%(;(9`)+H82qmAP5^y?TLJC_|Y^yd=D&u z@k~1&RFsSAgCYEMvTO3V{=Go5BL(2cWURjM@ zvG@0@yzOcZWEa}fe}*uY?WRM?L6()hz`1a|Fp=KehyosXAK85 z*fDoW6COR1Os|IwTM)hb*ww|rBql9`VU`8G-X_s{Tl(By35LJr20}0Gg|{Cgo_zq~ zZ2~;u&htjpNxh}&if5tlqm&o`p;AGwe_sXS6V&0RG3y4=yM97qu5dW=9sFprg(0Z6 zRX(G4zUepZcqorqi5uST{Kt6!s`SooNfq2ZQMLHXMG5_nT?@A~LAJ!nYoz2(^aerH z?I|4K4U!N~N^z*2QaAbAjCn`u8;fo(X5X4KWdOsVu_Q|gzy;#SlU5tVnL2YFaS$w6 z5sS%khD-(t2$!h)E7rEe*AVd zfLRNa2>Y==snr2JAO+Ebc$7oB8VHDE}BjG;F4UL<0hKM|JtcDceHXw4l-&A z)<_aWEOGXGq6MxFXs0f=2{*OVI6S-uSJWPs%U^^#3hK=hsit26bPJvF*?Zqno8k*k z9s_VGY^JPr$fsR7&UViZrg?a+FWhovkQCNG;F4SoM9Si|N4HYbu#D=O)^9yIYk2Q= zd0!y2uI{h(GhW{w7{s!g)gmrlTe34Q3C}mM0G=o>Q%&i0NlqGqQZ1&Mpu*xHHAU!V zjS+d3jMywiQ#-m{nye_;YoEqfdF9{F>;RPo4(=y=YeEK{7q6B<+7BgLLxSu}OEtU) zon@C^)ZmG%;G2mLp!BjRBf2^qa^HS3*v=|ImiD5?Lk$^XYEO0IBP!THklp|Llj-EH z8Z5*6_HdHN_nQvqs@YR|=4+1sGT7^)c`Dgo2$(4dxL!2rC~(c_)D~CJfvA%IM*Fmn z`>MHrP?Q%0%*ac&Y}`c^#PcMVMNOGH+)R7?{<9?(d$QkAG|lv?Psw+hBk!5oY8-AY z%~#p}3n$N4*IM(i?1zmBJDdJR?4(`5!&{Bw)!|w{;#CC{O}==k1Om)WtH1Zl%dRy) zds3g}6=Z2qd(B%Ds=8qPzY}F+<+eKzfy#GfK&~#PZ(C>r@$k)yGR|Tkoi8y=-?t2* z{x~BII;fg)Nqbg6@)08q*kPynlgcmXs@Z*sbo&j36MG{a%K*krdh0>Ac>Y{>X~>r-OncU11a2)K zvJkr-WhwD%D$4{Mbt5CF*vJ0ITPod<@g722M33;vU;KgApahs3lbnu6`WB1rD{`6W zu^8ZNmkhj98ppLxr}w4_Uy385UbcBi*#gOg0jEB678T{vpW~WZY=^K}*Z85Lg4r7x z*Uc#?WdbEor_B~{FnvHy924itao1;^25EzNRr*mz3{w0J_;JaQI(?wd@SGYZNZgSu zXt%@N#Ym`%$z~3ek5-9{fDB#bzc7IHnGuWmbTLx)q#dnnEw;+x|1P}p`~T0v`{7>* zZ!Exwe+cj63zg?m(bv2XM{~KE*O}XW%B4IMAadu`f4+Q4ozi)|UKVYp(s~<~o2R0L z459-~6htd5y!9+BH>Nbo0j9d5aH7cNyr|VBGiL9I!JDYN>~veO4%3!FZRDW&#^m&= zIHD8t7A{OyYJyK4z++*(X0-c|M_v7o7>vqERz$ zkA=}}kr~s^18iNd+?VqP4$I|(;#+7CdndALS{&#Lr*7amd94DkK?uhf&s^)JY2Bab z1W_yNnXqPy3B1!jcQnf$!S1^f4&cs#sbeA3~ zvhgu-F}34}BiftC_Hk>+0}LEMS|@#yJYcL1tm^)N zRX^1~^`!e;pMqz9?6YjxCJr|aFJm;BcG5@6#DC2UO zl07{FDRk!5VH|Pu4Q7RBxzR*At~hPz*|T!y&AY{L1__`fVz7#w_v=y))_5TO=UAek z4DFEMG6&en#ZX+hE>4&=#wx2w6WcMuyU zM1Vo`yQ@`_@7kGos{nVT_e}Q}+-monIP!8LYFM@SJV4{qwz2GYW0MJjMZ7BncQu%d^|k-6)}Z=NXi zFSgc47WbR+o5|KNlSA}U&~aVEhcB>|FZ>>{RvO(f$v<43o$U4j4J4nfqd8mhvBL3u+8yt7h z1(Loqqrqc;%b`)1;tbfC=AHJtz|T+m`wa5IRrg<-RoSy~+Uq^gI0ya_<%# z8B(*i5a@&gy2>iDWHZgKx8j>b^g}&#QlR=sxcs>am7@IM?pZ71dxHMAw_AuUxed1p zT+U0zUF2D2$-Zp0NJ*b5hl^hx%ArDa0ADoW^pRbl(Vx}=89ycYSRmwsg==R;EfE6% z%XDa+ZId18XZjt_d!`I7d+68}uP-jHpDiSuWHjyfU_ z)Xogtg_$J$t_xihaa!%fu*Y{_`I%2 z%03OGvsM3Ak2*p4u4Db8s<&DNB#$4F_2>Y?{EajFkKDRXy9u>M2`*iExc>f!90`LG2G7R`7xUVef*= zX@KwS4&zp+f=UIRRWSPF3p{dlHacV1zIy@}04mE@78t38(hAEDSEF;<5l z#+u*$-#9w&25?l^=M2!p9AZ=B2Pwra9R~{SCsQG|?@XwAN*S?cF9j&;e+lq3kpRGZ zUU%&~zRT4l1KEs;!1@zLRlqC_WZ5rj&f>AYe+Jrz0Y|FBEM04$1`j9Q-v}TDJil*G z`IE^TH~l@{DHG2(A%_7)zs*8Z&tp^@&V>TmED=Kq_$`9u9UDO*{}9kE;1%H%uO)9Q za!N_*b{Xm%1)dgbr~CHJH=xdft7_jhsl7`Fh}q3->H@vb%~nm6v4xr*jifmQjsexT z$XAoGXF7k>Ut=Yd!-bkY2+Hw-?i*)@(9@`ug-NmXqe%4QBBwT4CY>0*Q|NN6$|(`u z#p|T5nNA>zI=0HCGp!`Rq~s2I*p5&nGkj7HZHzV{cq?IKfl zjllj2Ey~@g?kt;GfG6X-Z(xBnbOm8`^t|mO?;7Iz*xFBhC7xzEuFa=4OXTKM+EsGIsUaSNALcdFY9*Z)aHXwICKfMLC^S4d17?DJL{dUk!hYRFK zVu|?Qw#h07Zuc0Xl9@}up3ScdKiBq40kf=syV$$1PkH3Wh}-XUY6FCGFXTrrQo zDVC|>nZXx%e&$tN+qlNPNQrcU6b zOv_+_l%@DX?O#yrO-RDh zKNt3Woe!~JfYm4$r&vY1b&;+l&jiHPTRaogM#U>0XJB%IIR?7t2)z>lBBh!QZrUV-XUg?j%JvHQqw zK3t5{o9SGBai=^I0>k} zr6yGd_^(Z2XYU8u>K{>rn1cPaGj73p2_E_ApP@ZwT~r$%whJ+rAa57aC$F;ox%+=0 zHaD)2J9G3K2Ra3|f?(fv4g|e)Hfvik+kd>m_M&3(HLCw3D?Gh+Ca|}^?LQwL*vguv z8{*HZFV`IQyEy=K%``QOIUaTW^uqGlB$WtkrgJGbuNb-ZD?t?R!oNnKb_|x^ecJt&}!^V9c7X1KXY>|b{UHoqfFW~eh`}QQ@zrbQdqcBp#MpT{I zs@%P0U^=@{mI<^P5K|@zXMnANK%(;oNHI!=Rk-OsgrW7*VVS#B&#(8XII4Sw4S@hn zrseNqcbgp{9#u`!@0B>*}GUq%oSDB@tdh7ern_qBFQ-Ry)56b_JJ zp??~?|034s9hEF-(kFDBT4KljiTlc_4Wbtn%Yfdsr=tJC{DEcxg)M?Q_J5$N7s#70 zIQ|D3xE+`jck_QB^gyAqOH39La=2d0ral-R2xLGhse9EjyEC?A{j>J7=nAC&r1qHW z^A|VjLE0p?^cD(SZKj)%W{I|9jXc|h$POHj8kc(T<6+KuV0U1%zU*GLnT@(nxY058 zj^F1dC|%n9q&jA&;P40~_b(2NTd@&!u-t89H=Cfh%!xr)P-*`Kwe7*yjl$>ua5qpH z-&gj&f0C4=FYm?<>@T_9Z0-AjcE|`UWC;AGQqjQ=2Uz<$;p1{f;D2#YgY(5fia#j| zL(5&{oGMhbGSKEgXUMtDAmjJe6p=CGb;Ssot8LYxjeEnEK2mr8`l#x-!~1Hpx~(AN ze{tB7fbG=oT7cxd@tY=lAOIo=1mF=0^FK|)RjpFT0k9m>E=*3#iyPvjJGAohQ1!D< z#hdcaxIJ}gw>B#@)`Gu-G{24e|M}U*JFQooXbf*VvGO%8P9^?CB@U0J0Co|mARmLE zw|QTwi}1|~6h@~0i-VAz#Ap}pQ#)&HHF^=v1gd29rML0Oa#nD>1xWF<4ubo18&JL9 z3R6;m*|r)(oqQ~TjVXEd;^>4t3~11%H~(`>F5&|*Rr0bbO{f#R1dp0j%1t+d!Y~)wpfW@oJ^TdccoDh^B-4uOdt{G!T!3QbOaJVbS!7i8uTY zPG$+Yzgzi#XEo^-Am}42D5vid|}L6#~fekuAUvhX`D$Qdo%G9UaJ)>k5hZ ztsqWtU zqFXA};!k9_WQa>u>O8%>9R!Tm_dkiuN7Jtn?t7B)#}AeU4_)g#f=I%<;*jaK*#8iGO{m$+vAObzb71UAr7K9Y}>!}0QMKb14;$xunE}NYTc0;*S=i^ zVp`%~mIdm~yrT}TD6J{Gjdz(KwK|Go2swXXjDJIJ^P^FH7LTl6`vZNjH)RAawI+^Q z2pcr9zR{nb^|GS$GhOaFupj8x_P+iID31E!i_p#8|CTrworJ*bEA^}fFUY|vA6WrE zl)>DJxes*fyL)D`Se#;0yFI7+01ZbHz}o0nwr{>x;%8Hj z>Lv*W>MO9YEQ3?=)6Hec8iWzEu4up7=VgcAXgg$VqM3+39TZzE4C4B39B$wvZYnD!frn0{w$s>JZ6K+=gs~O3jdu!KQH@Ss*w8l zsdK2r7|@RWeM%Oe7--~hI+rz+ulUG(`;)wH=g$~Y6BGli2C2)*wt)R zgGyzOL(5d7@GY=_e82ETSKgCh~v!?FnoZE?d&_$zS%a zBI0Ru`9DYp?Xfv^U}&=YZ_37#KMV+YIIP6de*HAJfw(YM`U!s29v4iPlk=I^Jel6@M)#2(h(0FoBON_?P5J8}@k+$AUS{p7#g! z#zZ?h7Tep@dw=)W>dF2;(YKENi5>!>!Dlt){+qV4IQIPuw;x7g9Lf-5Poal~ZYC?j zJ`k5pt3u!n!Lvw%hW9Ij64ECH?cI!&a5N4a7z7%$@ zJ%hK0tQq7xAjZk!^LBO8OOFx^-)x`r(DZD@aM-!mg-6SZosX7Dnrvrr9hKiG?cV%K z5qqIm`TqQar&yU`*^S22vo1{THwj^{hopM-is1je;^j4E@C;24uVTyuVUEqzG;iQk zrn-Y^+j6n_^`g4bs)VSFxpFw2FTF0Z+4dwswZ3og)C)^1d$Kk~MMMKRfX`g-2&}MH zi`I?UTjx*HIBTO9E7Y4at&696KNr7S^%OaTent(&@g$%;>I9FLwbRns@VNWOFnfix z`3-$MUMcT;(V0X~*zZZI6Xy)^1&%jK(W9+S#$wW>$UN|{XM zeI7$$S)HXJzNJ&WVdtopwsQ}-_ixmg{a3k-sh*VM-8~h;+f$%$G^Cu5Fzx=-xvJq= z(4-XWe_5_ckfT1Qafulh-4D%n$E3Q{-3xNK5=YUd#GDUw*xbQ_yS_0>z%V!CmjKj4 zB0>$yw2Q>pdv=U$_2c3rIQt!^BTA) zLU;4zBk}Rf`mopIQcP5dN@I+#7Wd#Onos|sBuSs#P95^LPHo)8v=nB8*QoOSaml0^ zvR;vvLz1D+f7$l88a&OzsO#%+&903$g$&3qq4oO0t|%c1Si`KRT{;G(D|Shj?`6O1 zzP(m8JdOA_9j~sEHkQK85Oj=bVWDY1x}D3tkj%Eo9uh#;{z~{OoFpDV=8i}bP@{a9@Z*G>DzMF8hKUGcS#;Ad&4|tMH!TLa;ZHlJpZtMX!@Vhac$OKJAFQk!k5MG zXK2@}@iqjMiNv+=S+$EaZx_&(4~uB=(a73HpprOEN6G;epl z{R%QB+^SSVk2<@c(kkgll0`SlW8ApcD&Iyv_5dviPq!2xa5ZK94^I&M0DCQO*iX@+ z5MyO3w)=?fbrq`jTK!wnHU`p7TGw;YGK!q48OA?r(NaV1gEPbZ_c;*~i?nmjB+gs) zTE)%{tvy<1Q5ftixS^o|ZZLi>JM8r_pFLFXwANtXh}D6a;OHcQY7R1%LgE&$=?$6S zi96VhnPX}?X#ozd9o1_3U=Fq+qjaR@F-W?zNAa7dcN;Kly`uNFOwuB~j@fq}ptP`k ziLHHBS1X1D9kCI9yX~|@i(*yXA3n!0*E)tRQ}A5AH#ckXV+4C`7n3_1+n2>QY&U*U zlTz02XYFyNNdHm&YzkYIFTLaM|8kv2HULYVkT#dzy#);UynoYA)0}yR;B>|!@@Vs+ z_&~AF-N|;V(qE+3U8HNp-rs&E^#Dcg?*DKTQ9;MboD(JIltP|!w@^ z&x;le=os#c&4gk!eIudsH~m`^bjx5B>%j-4XYTeUbn1CYMaBtf0k$%tvifA_QrO&X zNjVtwpPp$uU=lOc`%q~dH>nqbb=@i@k4Oca&OtfT{?XNrd9c?B@Q+xHDl1GaAK_L> zucDJ#E>bG3z_rLK>)-q1zZ?gl{eZE`vx^z;?^_Mkw2AxL7u$RJFdBm^9=yJYcUg~P zxA_%EPWTee{0vxwgm@#7(=O`(j};_!+q&M>yRxeiRG$q>+1Se?i1Tj{kp6430I*k( zRUPCDg=y`iBpg-%3TjAm|MNkgM5A%3)~M7E679Xxt76mv)M15WEd3YM&~`r2XTSZI zuX{SblSoE|71d$bB!$p2hbTY0{f#5vcp7`TIZfxPRiYOb!@^SzkV<3z+!M)$bWxtO z`^o_#IPBoZ?LF1=8hG}Z$j_3-4Y?)G83~r}DE8LNwb?wXUftM_Nbu%*5xOZLBrPzI;6-OiniycKd%S~ zz>o}~LxrflDqEcC=Gi#x{4af*xc*_Z`ncQ&_@v4|Vf)?P`!IH#w-hPTj-{3Bs@;9aHe37UfNOKIS||xeXu9jLHB!_>7OKnr$ruu0Iuue!0iEWSnv>LA;dx1T@_KTleIj0Byxc_BdD!Yo$oAI}j!dP| zEAv||=`LpbrmoxO(74XmNSRVsW7o}htImrSxz>6?%$syo;Ar?0!Zd{n=Ds4Pm%QIRGfqj=vco=W>{x*J z-n7%T4WhHCw&~tuSGlajep{XBq&_yKIX|0UQPN##nMl<%oUhLBUdxHe^V#?BBjJEI zrS4tbn=!c#{!Nf=U1ak<{ltdZq%@!AD0wE2Ha(^fYGc%4@z`esw}N z|Mvn(B`0eh1Ad?GHzV{i!432F+7$2E&?BZ)fkHn`?p*wR7pIP6LTLVD&Dcz-p+u9W zDuJ9nUp2vT_@enKU`p`#yvBeS9UW;b%yTqFYEQF!2p@7KG z7=enhbu7)^$A!>Rko z@}7g&Sup&n{0A_}L&hfieJ-&E~ufXE>=pmJ5CQ-z}2UhEjs3XAaP!NdhXhmZ{0QGa6^_e1HOpcXAAa z;%23u+RO|rp;o7Mp}Qx#Y4t6Qm%_Hh2x<3}|K_ZQkY~;=m}rFL2}TH1;vXS(xSUmEHpKGlPa#&TvfASA&4I zbB+E9Eef>zZfkYRC3NhAbLD+o5!M-|^l_&O#~>(L?ct8^zG;b|{I z8=w6zdWE(DhOM>*i*ce|zol4HdG_Ew_LPB=r6ZzxkBYvvzz^QR<1xUqm7@McU*WsA z$`oHFp0LYiql`T_n}hAoO^>Tvv@xV^IL|j{+ZYI-!M|^*4G6mzAp$U1zuNymdLqyR z>4zSOcW4A@#U6+afFlk5XeU9R=@k$c0G%kf`2EPiy3`T-54i<&uo3AP^sMf!{lUxl z&xoPrYaG@YWOWdRd{6(VF|&LFKTaF}Z_p-4*AB%P6FuL(0g~N@P2A`E25)(b%+}L; z)>?IJS})7BBBo5IzEo)c>!DNSydfPpX7LC=NXCAf?DkP=b=m5!onffw`5;B+aK8IR z(*ynQKo?W%!sWN4Sgd(yYrV@>?AwMn8+G#K;D5F{F@GUG(psR(AlweuP8S{Wj^y=A zcMLBs1{$b0KO<;-#yq(ff$yBZC<{gm3yhY(3>*+p((GS9vg$GerAIdw%QVhb_7)lyu911wSop9!$%x~GFro`L3! zxZO;LZWEp2^$T>|Mcr_u!tGPJS?_i`myaaQSWflp6!XkJgC|KRZBK zNI7l0-j-zb4qxf{_f&@HuY;>%hc|Z3n$R?CST~X!yeF~dw{@5$b6l~0qOM93p40cZ zE%`;&AObdi#*z69JbHGJ5@Wq7C2rVP_dz5}qXeGT#HxODwBTlvVINJ1>?OK{D`} zef7E^&wl&dE1!9kV{x}r9&g#hyM~g|Gvi0Zn0c=u8}!>S>S{Ul_$L#N*dC6CpF0^h zbIQl(CXQ50%)hIzD;qZ7y-kLbWJJ@(Ve>AR?vQSqO6r+6kPKFuO;TE)Iqk#AUUrJ9 zmPqt)ds=J4X*_jXXH(TXs*D4vZ))DoE;-e*Z0?IkHs zX7i(6o|${h?>1~UZ<&%waQVCKT%F?H5oXJER7R?gf*`e&>@nGw$eMYfX{OwH?yXpv zANOpVZnvB?=Ee>kce{Bh=LRCZ7mN~jCtR7Cm>k`%==`L@oay%S4`2DKoX5z1NykTc z?v>ToqNtpUc@X$Ab-L-NYlfx7zk{l76-fNVnJ4M!&eXIQiX><1mRxi5B<~m%=AYbe z%^AUbm3pUwOco2`>0(C^eN$b2!r7rh#i_A_|Jbae7s}8iJ*^YNRUKSs`&=;wq&iF< zlG?;p2+6*SbZro_jOyc~k0dfJc}^(Jy>r;S^W;g#zGLgxA>~Ri&3pRQS_2iT%zP`h zVs3+OQ;20z%pUI0=1ZSUFnMc{t;N8V<`e4_`<^>Wo|Rhb+Rv>2ZItGYC3@{Y zCGD+f=Mi-3q~6Tb>lhFS(hHJ(W@N2r)Lo}Nd7>HnbxnZt_w(D&XmOwMf2L>QfM%XO z4b_?kC0SA>OK*eL=qeVZ$-d_a4o>2=*YvWqTdL{7y>vy{Nsl9iT#7C#Jc9fZ%}2}LBstgDs8HiA z6WNW&V)C}+7}7`mEyGl#u~cc9-o5O%M!PM$#h*rV23QtT-EuouE1o~jqpxM3gH4<) z^D&f6W?PFA{+~#@jiejDYHmnK#=v+yr`NvWqR|1%Emzky}0uW#T3!eR+=y^ z&LSL5klxq`1*u#{x3(l2)?Is*!2Mj(7^ksS)6O5qru7T4Ntr6ji{ey0NpKZLH*3)4 z!|>Yn2Tu(VeS==wW_0~SLvxQw`_*97q$}-T*hJ&&sypoyB`+TMAO4DIBq=6B=)hm? za+q+V0KFTrUT&jDsxh`B%DO(;2bXnzS)jEW^CdT)n(@Ak`(- zqOf&KYap8D(oLhL;I%lzbK~pr5o*m&)2p$^WHX3X3zP| zr;ny~-v0fcg1U42drx)O3FTAK2C*1hx|V%+gU~y@Bb1Q~Z5Pa$16${Mxg^W69`(Xx z;%HEAdv1YO7D}s5k!&D1ByVNT5*zH$Duuxa*hRyh4=UBsZgcG$jLHr2>b4)5@od6$ z=hY1#d`@LGE?oJ>D!6!Gkq^zQJUG!fD|s|6r(Zc7m_9#R{M#j9@gKIws)mzr0TTzT zRWxiWue0czN{}P)THVxR_T2|DHPcTE=rZ1MWSD`Hp)Q;s!Gw0lyLfePhGrpd!NXTd!mAK9^w2NjWcN zMoe{~X;L*i@+LAJ@m`hB_tItTPhrysl?Y7}`O(GsWQ6zzx#P$I%KG)O5AAX=&Y1ns z5`3lbX}94s@D}V0o0B`AJU#t5Mh(;Ks*#+krWwizw+*j9n~OP};evQ~PR38fo}Ac^ ze@Q)-=Di{rxIO@5n z1~&oi>fZp_e%RKDb^Zuk{g1b~F^#B9C4gOECbskRos%1+|7G^S@WPr)t zyomFE`+C)gR~=#%{ah;0IA@ZRe$404l>EU}TmTw01;J3P+v>2p|Nd!_LKv3v#9;^1 zOLwZ%#P^U!X$tR%VsCLzyZPKcU+&>eWxSotesP4RhN2$ZaRE4TtX>EY?CtcCOmjB} z6+ZQW3DNq%Mb^&4S zW8}e4p&)ojyGo|x+#wzf=aac6h930Xo=Q#!k%@+GHi>%(LY`lO;1R%#WjQXIySwMF zu_(4%euHMR=rSqx+>OMI*@WrpTZ-U!{mfmkSG6kXC4dW6;Jx1AP@t|C>sFkuPThde z&KFu65)lAYQD#1c{j0N%W$GbQJj~cJrO?)}_Mv7@G}sC*Tt8X}hn<^!SAngBjg><` ziR9N|a5#=_yAhC$;XC2{yybEQR1JazQb^PPWdWMyWj0UqCo9?5+jxNS3_sFapxs4H`&c3 zF|p-mt@d-~g^xw#hP{e7-aIECmS(|(0P<`HYOVS&-(MBgCyF&f_z9E-i$KtL^P;O4 zmH;%_zDl1G*Pz5aE}9dR*{m8HX>eqp4QKdmvOSYSK)wlcr4rXAeLs9G7e{jR$sem= z5TE5L){t$RH~bA)@b})opdea4{tp8e4p_67*CQ~A>l2pd?HhA&!~%dG(NY20FNJC` zR=`5v7)ZS!!Uc#im)4Cni4xHD_{ZwupreQwrV_33EV9`}LXqYwXH0A1M~>#lMor++i>(T)s%AfS&))b9)C>CzEoB1H5>F4ie%@bDCG z_!JOg^@xIPuoA#Fgu`9_D=L;Pd}FYzYz!sn=4P8DGS$J*SUT9gQt(8Y;7Mz>e((N5 zb4*R}Fud4`k4@w=L-jFu&ByXDwt;=%C&?FHv%Se`mGC_~)t5sIMes&FQ1+vAt}Av_W&CeoJyXH&D~uBV5a)f%o73 z?Li85iFp1Wo;TW_4^udbH-aW=El2JTSo0FY5T6~8HsQu}MD+R5GkuLd{b7;7KWqf- z^_##W$Hda>UyA4Ed9v&QTg2+gs!MEqi(Ph8Moxtk)p7p$=o2DA}gVd2!A z-b|OC*9JO5bpD4%!YsTB9BDc__(bvA zu;s-+ewHmgXbW_2wqNg6ZVjMaf4kN0W2N58d$7%7eg$0i)B4D?Ejk^)88O7n#>m61 zEWdCARGI;&EvKlI2ez2e!f62KJ{kz%hxqjk{F@~}Z-AWr!Tyx(%kn+vDkx_%A>~hn zst_u}mDZnPzgw~HJm`u`E7uBnX^P_5SNR(w!3YIxjy$;par2`*Rt)#kiv)wz28`9? zI(xY81r*KR&qkPU7ylmh`!+_a*&)68(3}nC`B>KX^JyrE)DaZzQ@Cf(@LL#&1yjw) zL!&82C}n@b3WY;9EEYN8iTI$3^w2DV2pY306>o0X#qv2r@0#V~L6+LO35+Z2N7_Eo zzokM+jeM}0>7UZ<$~DCJ3H!jEyKp)|pHGx^W3fHL9Yc~xQQqPTqVWxtMv&1d(G-u` zOT*;R^M2NMqjc$K4Z`oqKC~g(n7dfySl0&g2>Pg#sNU|2yn^q(Qz&Pi-OjAy_Sv6q zTwLI$aGfR%#mw^iMOfTt`3cok67TfJ`(wsrY8AgjQn@dfPM@t>3_OL#tuw3Bz~YAjLm!x^BOca8C$LW6^(_>QLSM zY18i8J70^RPS5K$E*i;ybvvN4rhuua536Wt!%4ivX>UA_EN#nwBA;qB>U#-k^)k;> zDqG+(VfI^2N&lkl)NJ|SJv;E*uM1$jx|_}2D+oibFMQcF{;Jus+i>44FL^-kl;0LW zMx+U)M`YdaqKC>zMji|fADy%hQolg$sxzPT^1gi?M6*S5FJ`;Z2|?OC2i>4FKkMj1 z|NF!0>qXjdf65>{(U2P?x_#?+L=aursz0B(b{x}4pGaM|&|dlGQis@Z>VWXu$$w$C z#x6d3*s&e5w@Nkx?9S&7{9=OO8&Bj^ThjV{XA$S&$gIVyDE&7#=H^dJe0V)e$K1ES zWHF9}Ge!#c5T!XcatS00%UfWnEA{P-lgWqC`afI=U>@Iv7=pO&PhVPu3<_Kx=QJo5 zl6^ya0*&_!82a=@TeTnpKt!K+Tc$i@|C`_X<-pbvkb{b=tZ_qU+raG6ovYmA6iEA? z`wL$4Dd9P*I}Vh#EtySsPf~2=x2@*6{1~`v93x-@nQM#Rjg;LWVqD06|34)KKh#h7 zLDXKz5Eo<-KP^^N%HeOy(?-`GOJL-jtGzJedskyN7DN#o7VRr-Z468@Wxr4iRc9du zFp~K%K|>4jLG(#TTu5IO36KjTXYZ5is#K7_YqOT!2l-v^Va?$4-x0#!t#UMavRsC@ zd);v|@mFAmg&+E+OoczF0PvU;%Y6sX2^IgFRxz%cCRM;{=N8%K+v@8)W4Sl zyv+{?1K*|*$yI}ZAG02%!lK5Op|7JPo}vrP@@uH468`CL47u~iwDKby1%WKPT!RR# zg!S_U1_7qM8&jP7p0FTh$NT1I$II{BFy6Mia5%fh(5adqnFG-S^RL9F)sKk6Xj(a> zT(Or}Dp%|#QdXTjvX9Ezq#HNr*l}ZtC3Gj^Fu6B$pZbt5M%e5rHDpoIhorjS_&VV# z)CQlq@t*6aG*}h}FCd1K8O0OSbDb`&T1RPV+zi(;7QsyjP5>yALBnH2`iD>@_*KlG zwrQMChQsu*wtn~0VM=5TMp@-=lYM7q~nyNXYBj27VmDCre=dfS=FwT^}LK>b}8s zjHJ1YqBk|SeV~ehvUMnWR|E;V#EE}$)t)`8wkRXqc1-wyus(y(TOD)?LnvBS#Q)&A z4{cB&MKMx2vNom@kE=%`DH&R({cN~h@hxv?$ZO3zaxH!KWUM5*_AGwsC^p~Kf#Ni9 z5VSs0U=$cEC>YPLgIQ(wUI@7WzI|iG38EQB*#%mT!YB`j)9ja88r?JE5hn%aPWZe? znXd+ok4Is%ffi}M;t%5=nTvRsQ7}%*)|K7Osbs1^B!NzSLEd%9>v~YN?>Ui^Oa7p@y%EF79Y&}?E=i}xVAyg8Id*49SV2miYb%5 z(ZSK@{qk;4Xp_UXs%*CIjb%6Hbii(Bh1#x$?emwirl%!y`bTdz`jER|4v-;O?c~G! zhiei-{&W0c^wRPDu^UG!2Dcf`Sw%&!EnFSeA_X5 z&lSZ`m!HoVknYNNkCJoWg{CdaTHn_W_vE2|cnCj)_7hS=Y{h~o|Nf>~TN< zj2aipuxKZg$Ka)=YX?5zvyVxe0m)s>Iw|{p5TckYOI^)9ct0o-I7ec-PEXq=+_$$@ z;Hx%{rVTG}uh)O5JFgT#2Wji}_f5dXvZec?taIO{OW3>dSRr81LmfPLX(4&^H#E_> z*VmY9KGKnooU<_(YqSz+^`Wtyma)nd6>{b66Se53Lw1WgbV% zf)cm0$j?`CGlW?GcJg7)%ZDHed z&?S|aN?Z;brG(URzVAt-*Lg5X@U@P{YbD|;h*R{4i*&FCf>pVQA(VyVrT!OFe=Ufg zP-C{smLAACv7O%kNQxPCPGRy;(rxV!en)O08KkUvH2c^I9QnQT zkz7{P>2!pijkop`?v2XUBY&L!Ep1O@K6(xFm;=hQa|dtFK>kpTGfrFinM&j!!=jMJ zbr;uS*=W;ge>vS7hKs0D1}AFz?UXi-D4aR(HjJk7L^NRO=ZSo47X) zOGWo&m9H0`K1L}!eQN<|yWQXkCIPHQI>R2;F0tlDnsU)cgaP*JcyjzH!HwjN7)UsB z3+R~t4$ne`BJlWbLF5J$M*2g4WO33Vfk)!Z!|%cC02!M zIP+Hi%qgJf7#7}nbA>zlFTZ9Dvs_LTtxio}KG_sdu|4}f*j<%tXi(MzrC}MjvFFF^ zx5f69T`{Uljt9mu{_ZLszCRn7^{(FMkL>zORg8t^ zmYmipEnCv*C@8tm(AlnJ)YOG{boVUq$|mQGc1fYrtJI6RByj9!&V7KRK=Ui=c5ne2 zKAc$f!q~JNh)fysxv%PTyRFva0~fW}dl(-;BGt z&G)f?y<76V7X-eHTupzAJ1lCvf(8b;PrZ8!qRz)HFwLj-9r+T2ase3s(*-7i=DzSE z?0%}Sr(wzaAQb1Px09>-W|&5S|lmF$J@VrMeji+6!a|_ttQ;T z$RsimBEF`M$LFO->8sww<7vmNs4Z2Mv>l4`6s1v9{z`7s&f+H0Nc5MfIR&M{y zaE`6&ZyfcnR)*IztP(C*E@&$e|0Z`yWJ=q)jPP3x;M{8H`S?Np0-5D@t9X$NKQLGl z+-^_xa~|}Q0@JGVV{%@gwE=R}?-KtY?o>TsoM>ElbP#9kC5Y+AVda0ogy)!0)20|h za|1?;XaMxG&{cgqD39*1i`5bNv@7>DS0z>t0baCN(d)8vFwx!*`ZgkQK6CHdr2*tz z`dbGn#_j`mDlnlg7^tRy*mz6XxiDxJ8DzM{N!s$^rIkmF>|+6YT*Nqvr#1BbsU6bd zPB}FDT2~luwqh9tb9&f3>Bn1NAiLdZT75vX+tD+xxOj46{{A@ND2-L(Y3HVRUiNRkr1^!0j9}0zKqb(2qkSXJa>SM zd5<2wag)8JUkRyRfWeM#7#h<9#gvfc>xNjFOoZ+M0D}PgDfBMB{pK9e;7qwSh^9MF zye9?5O|6?SN*L-}ikBWY@i(bk9V&cjNc{{4A5_cQ|ea(JsT>jWktkCiWXBa%c zx(Wy+HFCE20kxMKJKrHwURsJdI@PQ6Zk*_2HdDBTsoLh9$yWaGuJMgdY^dj;s;WTX zsKrq6F3397d7N_>EZR z21IV?*6U|*^#)z0Iq4FW{_!fm+)}bm4stxaF-nCq0S3byju;e zA(%t**e2iUWeu!jII(+coM{|-DBF{#@Hpv+VEAkj)zkpl$yv(KA~ zn*h){@lSNazTQAP8kE1)HxYmeQLBGWV*%o-fHgnI5uwAihi&99#PCK$=tp)Mw>L)B zoINK@G6Dv#|1*tW?F2Ef>fua*dKDMBcb5zNsQ?F``TCxQxi|BHb+}IN#BhC>c()Z% zh+Wkw_AY;IPck9+eV+k9gMMHY0oG z!F9X1cVUOt)_@sP^}XoZ1%%N)ZiS-X;{=X2cUi`a_{?s;vZ8akw+ec76Pp`Uauvzb zZ70~MBiPrilh;wR_M}d4)2o{C zI4;)_Y!!AE=J7>MDn6fKv!p}@d1dAYBQ49ry63l# z*9q3PTXy_La76TZJE?GK!Cgv4-5gimUgy)2ACebD=XLhpC$``5vn-7HQ{#%gMx;sx zb!so~zQ|i+PiXh&HS3KGf3vJ}%k_p#t5?H{LS;1z@(QPK7*?kARnFD4VjBNJvhjnf zwtS!naZCUu2*P>d<{a5)(yhk;P+!ET6N!)?Dn)HX*}lnDv#yNt6%D_)Q%0klEXI~3 z)KF)mzLHp@R!P;BLz~Zp@Mll7R}+7Ey9m@SL@Y~+%1M=L)F&tF(g`~u=_aj?_jKXe z3ruHRW(*-Vws@OQY1*_3%|?lMF_$dh*h~AvAQ6YppEKVT&#Af}mT$A8k6X87t6iSY z)1%Bb=Qi*g;XK9$9VChSABcc}M{%MGZJJY`=hVkt4?4k_TgM24ptW#{aM+-1&2mnm zQlxEY_Q`hk-2%$Bzhcr#YNrG5*b8EuHr@|6|M}JG1^y~ZHRgJY)kRZB6_j>rst<n%h=PFLGkxAiT31M^1cUe}}d1%XkUY6x|Zf3->3ewmDAP_cTDR*|(g! zuY1%4Geo1~^)sjMBza)OtxvtYm1Swu%=&n9&~H zCcwYWUzzQWfKcy*_!t>oD%+l#eP6w(xN-r7yX_4C^HUp>c(S;o9pg9sD53X}0OXwj z1b((p0e?3`Ca78^9t?5DP?s~MuqtT#!Kn*rX{5K{qF*0C%&?pR3`H_^p386wEgO3- zq@RdS+d(XgLVYfJ_0j`;)E|Yz_N!l@0Weju%54jEMJCKrG0FhaDtNdo zr7Xa%0-K!3(1&m*%hcib-B`qafzNCMpxMY&Gsl-O$}IW;wA+7_H~RC_#k|J?(v|5_ zW8TY)9cIVgvVGi<@4R-Gt+mn1{E12!9uLn04(5yZ@-<&pKLsMb7--s+FVITJXuql* z2!j?m_`!7+XnH@uz@f|{DsydGZdX23r4oK%#PWgimg+4?Xq;>ubeDkf-)7Oa+>1ro zLNDX1hPdTG%vm6~!j5mwJ=CorEbGB!v~CH>eO7Ro4$QM08poBzNB$Y5@1@)%94Th9 zV~5+n|9gJ99x><3GOJiq?_$jwka=Ht`&e_LA5h&{R%|Ql$eiUf^q~y^et+YvPv%dl zPTgLUo4NbOx2^KyD%Nhb#V!eFVU~s_dHtEKjY=IN2ITb6JAfT>Yz2G8ud~-af<_`@ zJMCi$Py~WW&HEw{QB^dGeT=ODRXEAzxbkgSKCIxO;x(|~!uSE2*x~Z*1eXd`bR0KP zz-iAKTOY@PIH=sxFhu0F<-zh-v^FSSC&)l=L~hR^S|x@QGSDQmz4a(UY`bk-OR8_g zj%`KwZMvP%WiQybelh<)&%Kk&c_jaM>0meFoT=~t$dcc^P4&1%5D4xutafqZy<)F| zQMTf^ynKwA3O|A6aa`Oc0d5*}$m1R5S@gNiir6$G%K01smIL$t*!G3SL3%J2cc=Elv3V(M524k48T~K>D8W|ORh?tqj(o6 zXj}w)heh(8uMR3!rBUw0YE&W9SkSMv*diS2mC1F9vJ=nEw#RzCXngHA0Mb^v*GUVM z@PV1&AjOS}_nUZnF0y5a)bRP<^*NK5RU4NB>>U9JY~CMtR}i=ZS#C#>W?6P9F+ZB* zH`u&8j|o&_|9d|v@9E=9s)0q7VohJgU~@=g*dt=j6#vr<3kxkiLgiC?5g>i-T>fON zY50g2IqlhIO&fenPybAY#6RD`qdT}qY<_xWg>9Qmv2E<@6fvXY*XMg{O?2KeKs&pM z+(G{bIr#sYKE(uglr@sBa|fr|`b#(z5wR#Hmzg#{u3#E+sDVpIth%61FnFDOo$*#h zuV+_!=%d}Tv+X8xiLYJ7rf(b!u8;Q7f)v)SMV*|;X3UsgSDwhMD8TGxV<;F5bIrZe za`;Wm^QrKf{Au{uLese%Wz_EO(y54w`=t2FIii%gS9eQJd95)s(#&#bsGEouH`nq? z4RLjtsJ;!fG<8=+d}s>p-k?MI1`y|0IGg-FH^FVJ|Mrgg^|M~%J{Zn~`7$37%g2yb z&s8{&;e0N55$EG46%?N6H|0IbROlDDm`h*nI@{4U(UMtdEs1uSo#>$I61X+LwU#)J zEVP+;WtLpqqTUAWxTL{6Gh=;jo0+u~SogD=x=)1~y^8s6%=BTWtF z`Aqw_(pb)1L_YZfkMmI$jKY36X;-4dqs*@>L$xo>VpIk12qz8DgU#FXdse${;Eo?} z&T9BW>90rkrrk#)J+8v-6Mnx(#g2#UNQfYDcTGMal5rtaETZ5=^ta$j^Y5Y6TY7PC z1CJmU^VIpS!KRA2+&ZXEiEYN_L`T()s*TjKS8ci7Nb!2^0gIB*bQ4w}261?)(48^u^mfG8*m$=`w&N)w$SUfG7a8-(i8nh9S&*Vs(7>6as znrM*KCp$YOZf^e1%$pTB_UV-$^|^qN2}LX8NTTopHv}^O92YoZs%!6yv7GWh#TF+% zFk!*CZ8vLyu@eyrnIxFkG441G2@0C1cj?D=jIBTbizQH7k78dsIqTRhP6JIZ0qkt7 zyxu%I7)T}f5k}+iTC~<;m+;qBK5zQXGyC_(YR{A~e^V-;h>^&xKNT&cBns)z%;?vuH@CYKf*Zp#QC7)9lLpp87ZD^t#=OQ8F9kOnM&{A zv(?yX8|zjSKg@Y1bh)J}{sgQR_wb9VpHq2apHB-jQl*~9+U@s{io7+DPP^~G;D2hH zL*-IuPEN*XB^Wjn+8?X&NR=0_vLq*yGpteb&U3_usT1c;8va9QGL zA_BEKDpYs=EPIRxoyQT1ryp%xQ>|XOju>$#d#^yN@uOb5);_YlcNo3<*R&u7*Ws}? z%7mZ7Y61J7;7=|0#7e`Zi#v&plMO0v$vADZQ+bMHy?aP~6EJGkIrGnGh*x5I$?6G) zLpLc3!|5YPV5H6Q9i8tRH9k2!`Eq`u<>3CxZd4js%wSZ#LNs7fY3`ccJH(_3MH$4_ zj)~i1GwqE;e%c1VY1yh$)Q2A|sWO%mFzp3b9iQoXy=tRsuq)6GkvcRH#A$CdtVtUz zv~@ACJ>AcRd@#j%;VWa`#Bh0WPRa3;N*9pNj-bWv*x0%`Acqa8QpOpH=zQLit*(� zf0hg(h&Z@^iUFV5Y=jELkO6t6f=i0t4vE*utzb=(aF3p`h1EQX`&&vau#$mR ziYLK{4GddI!=vonKj^_cc@7s3EFa-W%K>~%K1mni?fK|=B@6;N$5t!xn*N7N4^nS| zGaD>x<*q)m|3E%o3I?^E^m=vC3GbCwdcSM>pK7|~ohEPnT=82rjC?OD5rU7ig+!i~ z(u&LQDc`FCKq36;j*eHMt`#fF(j7faxi;RQ0X-lDQRo z441bt939p>l=WYM3;NOwB49?aL1d!ZH)0)?0Qugd3_;TZOmtKiTLk1wvgIQ7E~h(cu_QGf|Z&t?3H(KTdxqA{e%n9Pw5~~0$*4%f0bIDeH|#EWbvv?53rcdeMtykQM`xPnq}Cr? z6}F%&t^zMhc+rU1`kMnepj&X7vc_xog?ickKw;Gr#ncLmuko?IUgZIGisxZiWW|fY zud%Sc-k<>%x)#*Aa-Z{k7srLTgp=HNF)nj=bFPmNbP?Gk~*d{*`tx?R|lp8%6^2AoDh$U?9?6 z$Cofc>#yOgUX6;s7Yxxe)IXoIb#R8Fm>WW>*Y#oQZ0XJQe(sfE$Q*9}QEXO7!ePZ4019NxVyCzIcR@xpZnKsLI+4KSOaZ9E)hak-LjCvvQNFIpPZN zO)t|A^ldf7SVuTz*GZJYQDGMSqe7m!PdbhBS#xi^2;xUm@7|KOsJdOC%ijMu7u_uQ zHy1QMzUBD;(U;wg&QrIfY>QXXkF}uEho6#s2sBpXq=>1yx-JYq+Ic5AoOmNwT%4Jd z7Rl~Ki^|Qs&-Gr~{Cz6?i+4+5sj!oepMXD`=XBC5byU|Al(Q$*gOhh^-wUGJ1cIAN zY^1FMsBt+w^f-fbUXD%44Qx1*`dzlYId0;4fLOiCGjAtg%=oG9&WG7GtF|FR+Qt4h z1z36LwCH=g1We@rc+lb_q+k#v8d2PGOoNh&(fGOQI*iG+XN`Z$vIhYtzco-tVmNF& zLgg%P)-j3SoC1S!CO;jdYp2vH69$cIa!=6QEYlfJZAw#{ z*wB(j2&F<$nQGX*i$J|z-G*)8d~h1DuWuQUy%&X_L&R`LY)V6KLcXIKP(@YBj9-+= zuiA~a0v7w_-{JU18TdCzI8~WdH4e5sB2e#XF3Q@zIgkNSz5|iDRIJ{))7=fsvdTxkx^pbp*6mev+q{*07|H@$4I{5?DH+9zajGO`^*WNM=)@UsUck(l*S&hRm{Ef zIJgQiMwGNh$=kM&<5hv6zhE~V|Jw^+ z`43{_-V?U?OWH3gBW}~vL$b*!XAp1wDcS1kGvD2){x_7PR162C0w1AahUHrs-%{R1 zK-075>l_0NNSe(*?m^Xf3PP2Rh z{g{8dWTd%dvdo8O@Th|YmgfZk5SIzFWkAl|2l`gIR^Z`z11gC4?X|yhW?tirylP%L zmUC`ke)~4>ynho9?lZ&xJEppxbpuSrZpSH>@n z;exRtw)u1an*s_(eakPbYvo$`2S%1=h%9|N1Pc2|O!uDA_}&o@B?n_5Umc}%fwInY ziHs_Agkb~Yj6WZf70~{rVc+wASur=rC-gdwKJu~^0ha2%TS3!g#b?^IM~p2Z`PS)* zT*m^ZgF|HgA8qd))?~J|4?E6LM;TE@0i`M&K~X@Y_pupt4^*rTG=BD$`&jJLEo;X1R&Z|~@XL|gZm zf)XM^AbCHp@Smk$+0U=h(3&@Xo}unV^n<21#0D0|tgX8l)R5@BJAEX`=$JuQ-Toa9 zaay)dqF9dEnNls8^#CJ7&oKbBF4R}*XBRUHf2FewN)0$zXro1NduvTZ5b)0?hSZcV zndC|2dKuJ?GgLmJh3+6R{Y|JOXBLZwoue1LgZFQ`u>A{o|86Wm{`&3Om+x(j_VohK zbG%(DGOg|LZo+7Hb^m1%)&$+DBLOR0`_vO>98T!d_aR?32y?knH%&x!`J~~9c>ITF zEq}w%`rJ*ytuD0)UjDeeJ7K%O1^qwqKmkecz^lDa9-zGo8K9w875p3xhn4*d6On;y zmqnd2I47rTO@DBwHU|%`oGCEq0ismV@H~nxhW}yH&%PG87K9EG@%!w7-tL+>Vjq9q zkUr}qHMV)c;Cy;0t@56<#xa@1>qx>5(1t^zUs|nEm-ZnsP8YNJq2NzHgIgM*tl>xm zqAkOijnTrQhu{UAgvaSnjOw3=>Y;SG=05p-XJcjdo)K`I3z`o66;P?LY#o!{Gc zKENKQHiqf`7nMQh<4y1tW&H`tJP7?~sQR0n{xJIY4B=0{0bl{7g1N-EuyIT9nKjw5 zLl#DHqjulDrfr*ArF}p4Y5nz&haVl#zRI&aEqIUD?R18ZE3NV+zCrl1HNbF-35kE* zk^w4~V)}31X`60txmM|5>T&uER3iiUJxJ{LZ5DofMghHx9MY)A+0=CRmpmM#G5mLf zhHjwIuBz;yXTUt<{CKu|%@I$or^+JHfHdB$eB10md zRyUA9kpDF&`Y$j6&@ezj*o3Zh_&WbT1k~T*LP`}20}8RDaY!&X%e!!$s@uoX8=BQo zK-982bDa1E5>@7&fXfzgZZB|_xso$cM_&Qj=^I4&_P^uWk#v5&L1l#9Uc@gd{}51p zyG;*VwqQr??irJN<&T{^duQ3Tl$e7}(__87T)&1ww^K4`PoAndmM;9_B0_+?Efl_5 zH{9IDIi4^8+Is1Hxo@Gb&iUojp(%I9NM`ALMlkbbA83DwQB)IvsH7i4{_msN?NJ0; zwBRA_S!Fi}$kL4qTPL1?9w;_q;{N~r0QuOmoZs)%np}V_D)DHiCx~>?I7jf=%`nxp ze{#G3jtM~uDXKa;^o@q`y+&-xt5dKBX4RX+QoGsCKJo>3S4L{1NKRiJ?~iqEYj!gE z<7;OVs16KBn?ZfAnEkqYAwr@j3}Sx@DdL&!ABF@P31ua6(oo=xl`!W^u`~XQ@*dNffwBDc zJ)m9ozt08&i;cV=V)u8HwM}lFZ^KCx{B_KE;t=`<(XhkTr}8{0LQZjW>uafB#(rJY z75v+N7Dfyejn*6nr3+Wy9o-c9*fZ}9O8x99`gc`*W++jw-yn+DZZ`fJ_2i5m`2yv5 z2L5M%e(|_47PF?;=79>0IQzJoU7-kw9)2<*54abbmfjMa1xP;wqFC4GY4&p!(BKLs zbKBja3}eXZVy&A7mu>zyhyuF}EGN#+9;j$u!p$X2cl&Kaf&z9vgt~nO%BF?p*a1}Z zS|1%l?$ke5YXd*6y^DN|7{Oc*A&7rJx)vKYxs08lI$CvttCT}|o?4$jk|lr6{_&B; zcJ22T+qKU$MgtN3m`S}}3|1D#%BYhQ!Qcdrvg!Y00Qha}A4UL!uW9x53d?^tfS(Si zz@4w?9DtMlsZ8?wnFL)&0~LfrrJFZ2g1KB}3%P#UCxAvL`7TVZs}s}x*^&atG}k+{ z$b-h0NYY;%=MruzRsyu*l36RJJkoKh5TNO-OHkX|pByr{mE*ucaQR`uI9>oL#{%^^ z4Kdic?ZC@y_?fMgcC`==ZU)=+nhtd5-&H;TcHc6%b3(D8piK%qWz#>jWqQ9pp&x4c z4tgx99yHE!7|ZwlsAqJGQ38U{SH1F~4Du*xTjx{t-#P<1{yOk6#)GmgY@sK(Wt97& zU{79wC3zT9x6I`hfe!HO7fjyenJ@h54f;0O_5X5vLqh+(Ye3r%wS$M6FsWJH4XLii z$^r$zx02hXU~JtS)M^Q25FqgKz0dcT0Eq;y(OdKf`%ZbaZI!>RV^{M#D1?kZ>jo|s z3cAh%N!O3X?aaGFf}+wLw5Nw0h0)E z#xJ`-&xECM)x%QI@#x*xP;_~ee3yL2*1mNAS6#aHyV$Aw_wUDp>nshq*|huBx$;De z&Bl{isXB3qkx$B|12rxywe#h}-!Z19>Ol9p&`>f)T3!umPrMXl_gBLMLT)9?@oZk0 zYU-blnc(!3{zGkHF#Op_Mq}h|LBBUz&#fuphrMow56`OeihEHGo-Ct=;rd4>1Hx`O zRr74NL#h7Z>pHCEezkg<)Reg}qaJe0&$nGm0Z|t|JjH#oOm!yo7Qba_*3j<$uv@&7 zJmL$3?8#vaJi6*4$qAs_UBK`mgu5c_9t)*W)AxwHR_0FdJLGr)O;HpTzS9=|pC0*T6H7F0{TvfMoUkqQmYKS2fkfwY8^30m zzu@l8dVB@CFMM4J!Nr>t3{laK89tnc(=03Alo?UtG)oR2&UXtt4SP+^cE|(Y^LP_J zJeP2?tckQ+qs&OiZ6tOsx;d0w4{DYFn*Vw==kg#mIS49?seL~52qH2;D@zg3Xqx~4 z0O#Z4ogoOTuM+gG7E;1v7Sx&O9J#2S5Jn;os){Cffg_jmao#N?O>rcR=~p$J_g?H@%)I`9=Cj zyYcH8p);RK%-Y3YXE+pH;C&`ldGJj0dMSll6}xu)_Mi3m&`!%Tog3m8J2ggJKQwcT z>JE%HWuNTmc^jkOCi^ihv2VV8(xcrjrBF9PsX^j)xTT?dI2S(g1Z+$woU2$1%i1QW z=C!}IX!3H%Ty}7iT2e6K{3CRoeCYgcJRL}a!y5-5`V8F2bGn)7K_w|S#wWGg?f15! zJbLCm?y}|C8IoY=Gn&<26U*Y69yOY=+km~52dGs+zT+<_(4Ta|@D&Fe!%?`Kd&0TA z#225av8~TeF`P^FDbgYwqeWL6xBFWrCF*lr$85@PIn(K-$g_*W;7ZjbIwk%Zw!sB< zVYcW7whT*bdG2U$Rn0*@=aa-1&n1IOQn+HINNnTJ%#LW`a?+!C?Kj9w%3iKG-%C9{OUZK43gI1PJ}BrZtyQdg-|9waJoFX=_HId;~_8ELN%3S8^x3C zAzO+`mg2H!P>LdNy32!+d*OVIMX8TB-68ZGojU~m%N2K!%7LJ#&c{bp)*l(4WDCN3 z7BUp5eG)L$Z06OOm^>krr#zyi`G|Ol(QYRW;-SY?x@6+qo3f{gl(vT0jW((eHnxeu zuX31@GpAvDb)TGoz4ZJ8Q5(FtI>fd<>5J-llRakJm_iOb2p5J+5}a2PR#@IJc+W6! zuIeab=xT#6E$f+5jFxhYGJ+MqQAbF80wyHx8Lky9P*1rE{&lvgK!`t_=%Xt_J+0Al zFzfka_d>Y8sgDRM^3Q`v4Vw7{gZpnQ=muc7FXiN8a~7CO`@^cn4r z2!3H$vl+kn4k2*-ztqRv;Qcj(P=yT^&jh{6Q-wNK3-=yf`CN0nb0P6TX@rqmd*b}{ zN5kpd7)-~2PO?pI{Ncv9J(tj(QE|U&hvX+ z-QBTMFfK#afhorZG03Wn=ux+=b_-@y?ZWM~j3c--Ja<=rnX->6)EJ=^Z24u%SZCcH z)WqaZV9t;JHQrRB<6L?Q%O9x~Tpab~E1Sl13-qrUWDe|feo~#B?U{+EiDGcQDuxpWq%mo|a%^cM6JsWZ zl_rNVR&iq48&G;!*uETNO3AcKHN(ZnJT76_Iu;B!*Ox!hGyeC4_6~H8CWcC=glyfyGNKvleli z+#gP{KEGsWT7;q7J4@FF!^lCbcqk!I!DRVb{ll1<1}A7x%k0RG!Dy}Eo)bdiHmZvj zy0{0LGf%(<^1hsPs!x+Xf`2{Ca@9~0Le0c$V_9P{ge`gbJ~j=P0CxclZ44YzOO8ej z2csWvr-}rEAy{Ljds@LJdS7-NSZg7wFI` z%KVK2qzbWi>o)EHpFuB@ZJ^L|Z1HoJ>+p^F!?VrsFlSu(i(IHoL~{4@M(cOALy`bI zexRfr9$u^hy}n7nWc0TSS3LUCQiJ^Nt~=b*1G6GJw8Uz@;{q$_1o{d%C#iciS-AN> zIrD(4Q;t7Qi3jmn8+L;G55zBhN1v~kyn?&Q9Y}b;K}g*Fx)z(|9#EOQ_z|TXGhj!B zRtJ8aNFPm04xjiFj31nX>&Ew(U*~?__&3`LfM3G7iv9$Q+jfveSo}y#&=#TgG4Ah# z*A-qGN0U7*=?tbmunWw8sq5=+UfE#z9=7R!UtjXqH1v@c;Az@&z>N!RdEmcP8=I~< zh=p_2hM$1_<+(lU1)F*Qdw<_BLg{REsXx-`v$}O}b?KiDPn!HjygCtM+H3wB^nV*P^_b*iGvN=qa`H%r|NIU?>B(S1}}j(y+e41Ts9HJ z2(fUvxwocXbcRcfV(POhGyXT2@QIqjq0PC>x0?Yh-sLq&qz~=qigu|CiK$Q<;lnp| zT&a6L63itTbND2s0{)@sGh11erhqsRDK0FWD-u%MDi=^`pX6THh@#iIzO)~l@@9$lDK(ZAb>U6tiyhf8pLwhJ!wVbDSX{cF zhe^3Jr&u$CTW^;uH?}A_5Lxq`C~4wvh1C{0)8o?!w0GP->apy--%Js*OuFtElVpCD zgqvgcB5D+y0|6LqbgqQJF`HVqu_7|*h;!@}xfL-R;# zS>rJTuFIUeZBzG%aq@8PRrYTgK6|@9*7jgPcv^q5Ub`lOc zN+ve7CC4J?#ND>e2>5tJdsXFCc+-3-n4UDltvqEX3J}=d5d?7_ylozp0 zbyUBznXTlH?u1XTuMD3OlpMU9_w-=i5S!I((D1Z;sc6v{XZFFy^zqAs?kr-v+!?ID zEqt`a!Ij6z^9Wa%}4m~Q@It*{FBPK zDNAeDu+L0RXIIJwW`IYkB{!GftfqC-P3~-8@1t^50dJ}-9PVnI-iV*%;0`FSnXbR3 zm4Whlt4wmYbjr=#pDtUY%$nZpq{+9bbt=m^n4~H3dGt!(M-nIHz}r5>ibfwy0pZGs zteO(VIZ3&_NXje=5mv+E=J`}ihaCm)_64>j?vNZn8El<&!nS+_Q z9SBAtoKuohoyEeqPChM8aQ1A%b){A#>71Uj*_C|mv*gOQQ%HCnxQ=ruQFX3LIVVXF z$zce0aiiL|p_@<=)IN3?&aNX_-TZB~hl6({Xe7@+2VB1A)}_9ShNg0dH?IG9acE24 zT>8n=p|#BlS5w3SE8X75wyj5{=D+C`5uPM0zDt2q)10c`D1&%u8ZFl7HIi-EC#mYTveFuH_)NLEKps z+A%@POjJf!?p>7_`E>==@wyRhS7E=b5#?j!?otuM%AToSo&nb$g4|1yE$o z+gpo+TdU-xGSATuZKz>!LN zkv>)04986WdUr3+OfmSOUF4(0D&+O%TR-7jeK-)xsE94$T&Kki`d(gzy}Qq)SZ(gm z;4%{J?uR?r&J~wsJbGUPmRpBU$KbCx&^HqlES5v5!0OYhPrj#&k2mahphLcZ+@;8C z&984qaNs$XIb|3GhFAH-Ktf=1;7`2P3D^k-fRXLn?r)1o<{*er9E#^ntVGUollnQ zwMSXwn*_b@BKbG9g9aUA<9YLSR$0FZE>vZgQRDos2l~9%EOED+npqgDyGEb?nAPcp{p!{$9to7$f$M2$)_QB+Ym!i5;m8^GPMZGgKeZ$jxM^`}hSxtWv3=-V~o(sn5L zWuOj-UYCrL-dlF2R0>TRpxZg@W>+2U&I&i$>sAVv*d{ICwzp_3GMg>iv&}5%aKTh; zDVrDn{G(y=!YHbOpHvWMFxoSyTUf4deeSw5a4-B5>Jvf3x8+N{S}h(EE%5qb+f+K? zox>VQ2NL}zHRTJ0Z{YqOupuW)vK(5Jg8{+vS{ z9H+^AsBCRD603foO^+_qeScerQ4#wP{qwtLO&r|Z{S7cTOI4=8DCJq~Y9)$31(aNBG~+^q3+jzhd+VPhN+q@!W+0J_nq@P}V-G zoa7I9#uS+pxhJ%Nz0TOs>J7Rv8_xtkjtIr{&U$8g*whsohjV58q) z4CORU`H0Q-Gj4HYZOSir<7K;?HsLXm=O>6(b$4!d`FlfWNSJr_Zx}B@k5kbu%^FXf z+=^;0DRVtKCxxCoUc;Vh;p`O+LDeB)35gqn5Kt9L?i#|(B*0W20Lm3UZbBJysej_H zd1!v%mexVeqb$b;g_r0caLMg8BMwWw(oY!B91}C6<@~#y%y1vC9)cxZC!NRz2xWvEqc+Kv`KxEd|fo{zI`qb1SObbBe#Tb zDfItGlP%tPs(y0BoJ+Irta1#TS#y+JTQX9VVB^#xXQt`Rp3og`%e>Xhw(O`v%@|+} z|Eh1xeYI9cUj@O$%#OFG}EI%r0VbQ9ZZjx~Y>GGiMR7T+CB$61*ktXCV5^ltC z-YuGDl2)iGo28Gsk=eIMFl;D||c*q1aon4Q4nxrgz*C_QPY-4}A)5ub*CBljMLkydRQ8 zGD#a5d45FPAZo*VlQ{Zg=EX1zF2tLfgWP&=5+!r%+)@2pQ5lC=ikS*Htk(G__I5U)wK z6|>nkT#oyk3!K3q!<9{m0_(+fNz3`^gs)I{{By3?TXppTud$CigmA^*H zN)$6oKW0_zDo%JdxTj%$I>f}s$YC~y;HfqrZ`^h+Lm4^VDk9ii7#OuBRxY5PKb=e4 zg+ktY?MS%MFZF}IbBqRLTaQn!aZ-Ok@H%@y4zp1AUCzm>p&fljEJ}Fdf(g@3$;<&v z`;?1Up5ClSVXS_(-JmogudOkIrDP<99{1zcxK>=8J8vd->tIQ0lcxF^EXs0Cdc2P! zfzU>s&TH8KxS{fL;T^}4%Mhe>iYXIQ+nP?#;?Q2QSJ!}T1oOw66 zIA7U8xf+McA5Qn^8r}2G#%#rt{!5+E#p$_KInK^2Ry6tg6{nv5k>x`CwU|;R#Z0AP z%H7 zb>N4V)32rMH@`O`m_XEFlO0!R&-%^Yx#5^S;ECCjx$LVI-o>_I_|3-os*c(7xaf=5 zcX#7oInd85<9Ju(6#fyp%t-*g-}D5{V_!HI$EG)F7{c_;Uplv_YJDg4{{eI{FAA}o zd`IK1Kpmd}2fMGZI4(Jy_<@DtaDKLYb_1Kw+#yu2nRKAMZa+0MH{_=dTDT~}bw!%f zt0kh{4g*^9=11n5V)lKJpC~%_JU%SH5+IcRBY;K@i{H_KopQ#+YFkDy|#! zT*XtRdOz=(06BxDQ5!^BZUQKB1vgqh0?{sH4OnoT^9jN(UQJyeLGr6HXI%}7dx-xV zg?ieEY-8e?YJO$Qj*B&WF*w6|DOU$Gt3f{MC}F%gx^;TH7uaFn6&p z;}8mA+ohx^l$KyUe65AFqLfSw<-$`zzVpS{v2O$;4Sime+79{|GTOVQW;zyp(dOy2 zfmz3-Q@oq1bMD5nMYU1a?D2Kw;2;Hb&N&G$)u9S;*t-s zGYR?P>r!WEjIpBaP)d8%y4dYNKwO_SWdrvui*1&CH3fZ}D(JjJN^YkVhHr)0b_`D0 zW_w#STbJtd1``26(Ch!5sAHWdHD=zhuj!cWme;u3&;$bHo#7K%g@Zhk-j3?}D70lN z00r;Tf(TsFjN!B)hGNx?6e2Rd(ePt5aEHDl_&O;qr#1dDWJiC~UeHW5-ki)6QCOZg@Yx_Ze*|{5|I8PZzLZw^ zHUxA>vgEcIeish~W`w%<;b;2x79lY)dk{W*$79t-7{+LC^W1w3zt7h zBLFo?lycF_M;{tY^ll-QwQHL5bxX{iP_P-XD+h7UA~Qc>(gx@?ZOQNBLWxc%C;^J@ zFfT&6LWeB`vA=;~zCSd<;bElnyZVCkja{!d!eW96a(bR6c%XCWTjc4Y-_o|HE+@+u zPu5KoIV$ngiY;>_Mk|97UktA34Jx$LANS6qU_*sar01qwyIiD>5Y0dtpm9I`uo;(j z-Jh*2zN_bqyKD9)DCg-vjEvP|J{iqr4w&ahEDAhUn&__po2O$aCjSj(q)ku~ZYJspXT%67 z+c|1SHm!%y!-@@1Rg6`Dlba3gxxXE?*>5FcF*fZqqp=`|=7#U4ng zUXj^=_`SZ1@#a`n_gOotP}ha!8J)$$>n|<3%JbS%K?SfE5LXjdGNLoE6d^)-Fo zT2BO7)~GfqUUf;}`n14Fx%qqg=tmDT0Kt|apym~C0bjh9iLCZ3Yw(N9v&3IqFi16* zF3b&aC#hy+Lpns0f@{6ajMgb2B*=FGR zdV|d)D$!gU=WN$V09)lt|szi_zdt-eXeE;+GH=s>OhAiuA65l%=#?f%8xPsl>dx! z?!j&U2eYotmr_R--Ypnr`{+0Ov@3SD_7Cxq1VM5?cl7{Evs0w<4gkosp|Z0xe6y7P z2=%cINbigSc2LZ`y9{OepF9t0bUJw=zM4ZYV_@n$xshk=F?lo!w@=L|onO?K|0;tO zZOgpVe1CZyu(&=%M{pE$%0h(MYfLDZBs5sD722!`6{-M1gxdWWW6&B>4zCe|pRaqL zkqyKJy9cjT<=F~4NG?xX`iKuhBXphM|0 zFylf{rg|nT2cvx2csq7aD0c%jn9Z+iT8eYhsW=$dNq9!c>$J-}0cTYkUz6?I6=qZR z8iWCoWmr2bLT#bnZ<@Keze!GY63}B82XyJWQ?LY!ctHNnimF#DuKXRs`Itd0i+8$lwc@yIZ`SO zEsx-n9rxjr6!%VfWJDK_M+YSkCDM9$oPvigRc|oz$)4+iyD3q0={9eGTn)%tma4rV zv12MNN6V`@CP{2CF^>}c5d?p6t7UTO<4=G!G+ZL9fy}8A*Ky@GeweLxx_?z`8!k<* zv_g+xw4Fjs<&YyDDx)!SSo!0Rl>BX9iY%9XeX&s6@QJ$-*;@;1Ecl|l zyF@#qN9TnGJl}Oetmh5{N2&=V(_2o*^lN#&_$5Y*e*4dZER3;6vrKdB1hFEayH1Tu zbKFOzO+}(*EBy} zFD&W|02AvPqyR=2IZ>nVqLMFsIV&ot`Do*s>NInvE`y$Wo}oxRYrxrt}v8#8<=`NViJ0a zf%2Yp&rI|AjnKStjh6!XKW9&nSXrT$tK&j&?&joGu4n>kN_+3ry9YRxT|aS|xR6gX z@%wp88?Ho30cd0r#@LfF#~6?CRNG1#0ZyF=PLNeQ2L%7|maFWMNdQ6BT}5$Equt57 zurCm`pdP_daJQx&TizueUE6|aNXW1b-uh3kfsiGRw1)pu#R zFi5a(yT(pKvc_eC*oGl0pjb7qZR$Y#bb`h7DqIlMzp zg(K0UJ4TkW?st*t^1nfQAAv;Q>E6Z9K-{%q&Uy-5{l$OTdKKQ3*h`D4gz^^J4!o5?HpYdw)^`stc+&8o_9+w=sh2o7Tio} zog(iwm)lt;+QC=WW}Lk{dSU|WD!+eh07@)2 zPwbYybX$3-#I7v5zTfFNqO~Yin12Ta2{96&hgpeGNXXmR!zW$nd~G#*Ei^N`8>Ji^ zB1Z?H9-rQ75HYn>HU(kVM;&{p>#zM!{=quIFZo4J5T@FAVv&b1Xsy3NeYKmctvbkM zUnj*S!8nT~huuV`PY4PK98fiCaM5b;hFN(Jh%sAGAvki4G)e8H@T~9%T8w)VjTQIiQ^}B6sIU(T|aLWg_K08ki z94f)tr@FlIHxLqMf4nL>V4K?r0(R-|Kp!Ovc4y&Wfr|$=uc!Gyh?tg`5jrSCv2Je6 zL{yMBz|icBk_ZN?2sqhPRQ~vrzw_wP*L=MWjTcwngc2t!Vx854t><@Hl-#(r=O4#5 zBs_%OEr~*9{Bc^JB~h<{>6(4?Bjo>sxMslbmTP&VVMFsbY=!6gqf5oNkPaC00lG+H*Ol8m8hg6~q z_2t|K9GUMBgE@+fr$9P1Ta3!SsvD>Sg+BjN&Oy6-&0n6IIE7_#$-4l>V8m8Jjw2R# zm0j-SOe%fM+%W+bplW3sf=SBy+cMyFQAwcwYRctU=~3*4p5On+aGe4fTQr^ZOpb~U zjQncpjid6o-NtxHZ?a~F#GdjKFTI9ZWi6YGXxF3xWmBNQ>gcjZK9I03;X6r+ zQU(YJQfsVq3S+!rI5ppS3Sz<I-lG_;KtgM7$RHmM!f z{-QHq!IOM-N8h{?5qhU>U=YB+uC(iwF)xVX14Ex`Wt3uL#Kzy;JBFKoEKTBfK3lXf zKa<~B>tE0)=skQi%azaFIMd|sJdA`l)EKAqEfYre9U1b=7c&18V|Ck1-PP88y1+Q# zO3PXIX+}2N!q5!UjvO~_VNpuHf@3_K87|+(?Z+MQ|62_MVec8Bk6i=gGYKbYkzp!4 z-PKlS50x!yU|uFRgSd3(%;k<&^yoCNBaeLc#2Io=zI#Xr(~{_|v1@mXtaV5?QL!N5 zcmc+w8V7G~c31rtwKfye;Z#{;58U>+)o7;K37{Ee0Pj}ftq}-sF@$gx)e1slj`l}w zA2kx)*uu6XJHH@Y*&P5=_T@9z&s!KC){={=Qey6rO={#8H0vPO%Bcp3!5vi5iTUGu zA%=bhmb3v?9vTB6eX#-w#S^v6xc=na<#JA{-ZMf?>B z3&uR4H@?nsxto9YG`8aC{bh0<2=@JGqPzqU;2PvlTF6?dt;x>bnY;+jBaA|uamasB zkjPWym*FyYEsEFZQD#;==7k`|K_Z{;psPSK)umMu|MZLo{xAB1p^i&&pR_V|Wo+op zP_=th+jThsXp$dQIvaE_*QZvaW?qu1CM-Deb&PU=7`=;i1cQ&@Pj<iw(U{r#>TT;=WIRYkq#}2(Y8pEY$g4> zWYw^Sf06fCk_Uz&zKOUsV!Tt8QuW#KtT;-_~M-7P0Ha1xC;5bjUIh6+Ryl*8~ zBxT#YMt7Z@0lv4oE)S}ACm|!H@|#k=icMD#FZd3CtJmTbltA<4mTrbHD)o8Vv!@%n z-)?x*42WV=c`90IQ(a}VMK<13f~1QpPs~1YlC0NsA^Ui0-lClKC!dh+1`#I>m*hLH z<>rMGPq%6}G>8gN65d{QN zfE;c)ASR}ig4d^*?$0|L6KA|k_qp&o{qe4J1qa?du)Nd=P-s6|NpNUE8qlBI`MFKu=v*BTrvR>XsX*bM3={Yo=r4f({K>xn z1y4e9kA+wUkJW#J`uBB$%S%m1e}8e|u{xj1{1l_JH#Nx;`dkL;7~Toq7W5jBf<&W0 zKx#O<@7E@!-<784Y(G{-n)iPxV5(3r`DNF&M9;poF2Vn<`(3G^@qxG*v73Y74>paC zTsu_TwK|kq;bl>xs$m3P)QO*Su;7P7<(k34^fN#FBP7Bwp2TetoNrxVKjSoC5uG$o zC6#@W9ON&vJswy$6gsq}v&jnPQZXU$zYy&Q8dwNeRx|>9-18-&NK63O!f}`83jbK? zp9s>!x5C3s8I`XVBe>@Vs4%i5tmtCdE>-TZa9CL56P6*Sjm1X=Wkrts<&~BF;8yob z&qoKnDv5^t5m}0Af-fysrzIe`j2{Qk$P-jSLFr{tsJ@7H%RZMklQe%BND_Bzo~Y0f z-;aq8x(gICEf-8v$}aOA7B{ba)QIUgFo9M^e~aWCd;~S=KQ1g49u0Rt*yit6a)~v%;NEw(*{!Ws(ehK#qr-lTN5k~o+hwY3nq+nK z-!w&0#;1nZ?-4Fja37h|E-$nOISRN7G*6cd4RFLcr^UtUM)H3c?az9&iTRUTC$#{ z%uYvTFnk+IZQyjrgWvs?^>%F$q@(s{n#k~I={4Jt_U%xtj zAKr=Z^QE4h`Y`ZlST7alBN0kiaD9Dts(yR%ad>H^B#wpsCB7;n`J zdzqZZ808F+u7(b*JDAN zOq43fN+#{vVpd{1rfX4g0e3Wf;z0)U^GpM|eS(D?s@G{!*}l~LK)5X3KAN})H+%{$ z#gE9bM;bP$jacA4<7;>W8Vbm|lt+EU)@kZ{8pvLhszyz>cOM<_Un8)%{XlC*((ZSE z${US+eaeN+_H=Lys|-JP z^t^SB*`d%pkFGB@Js88u=3YgEHc-ps9Fj1wD`Yo_?jt!~-iEJOe& z^Nf)IBKC|E6=N8xF3cNuq8&q4CAJnAR(=r5!I+Nc9g9D`2JHZC z+n{HD&hO7o^|gLtIb$tc=g&SLzx!yIIfL_w>l%~3z|bSnRy3RTgoR^w!oGq%I@MqSvOH!qVnjkNOI|@Z1T}Y zBG49uF-Wo)vRHbhTY=+vR4pYuv}5qO8MdKdKpKH^46YopquTrWWU*q(lifPRy6p=$ ze6Y&4JToYLbq0E}-j}LapqzN~;fDPVpa}II3KR=W9ZpA;RI?nREmP{qP~SxGfRA{KB_a7&zbG=Df0}%LH@>C543<+J-c4&(@*k+nW~&+CXc+qNaC@_vUuQ zo55(Gxih!RHqf{FlOJ?XA`4H@WS80rwB-&h0xdvh&;-ajpxRt1G!@^grda23wZ9iV zkj~tr>~FiI$o!_m4=IMh6%Os`2vqxm2_|g0qvT#ABPJ$ONf$p2{&OGpG=xm9oPm9i zalz-+;YGdJZNnP}u4SgfLGm5FX7YWJUNZwq@9*>Vonvr0`6&j}*F>*13!w17c&QPB zn7lx`39tfd1n>~GY16-tx_<0Gi|!lnf&7RjD>sX^)5;KgNCfKA?^c2ZQ0(S-qv+57 zxV*XFuWSE!(ZlcDH=y^wQwm@YZR7<2@h4~kXf(-N0gn8MB>XX~#>Wz1n>j3vcr{98v`N-!5&ZzcTwwFfJEfhaRm5C>~ z<5R;fW%@~iHI`(Z>7~5GB-h-pU4ZhHECd7x)GfhnsCe=ci7^HOZD_=6;a7)35RP>> z3;~b%zv}(8#+j_$YuLF?OoD2M9C7=rCc@tf~|t_&L6as^zR$Q5A3wh6K9Y9 zLX;25gh4#{>&o+{<9z2`m=A{-m*WzTM@kU|lqp~JdaEKf2P9T?;dWtp_!%|s3B^9@ zv*CNC?DPSd`O3eHizjrfCq0^#{vx1v%o(W!!rMcCi5-R+5MJ#fYp0QIJMj*J8g=H+Sv&Rf#4(T43~ToK zTGFPwSi~sz>Ru0n-4Th&hs=bbQ*C8RccvN?1#Xfo65%W9k#51);e=SN5$YUV@xJg8 zlkhQ6&AlD1^ge_NYPV$;Yqw_vy=)QJLMK%b<495mC{Jfd&f23L)Ik=@!E&x0*!{N_ zBT%%iBj17@wWEqDz+S{xy@;-P@{RjP-hsG(Z}wi|x3cY(Nw5KV`X4J3@SmV6oXKao zYW#A|o|D6|@oBvAvSR#W$&ve9ycar-$3((|+t9+wv*f?HE9ea)J^>AwcKmdsZVNhA*KMCj!Y z&?_u(PF_RZjKRM`nI4^IEI}y^D7NY86U2o2v}9P8dd|#037-HX;>0KpNmkknPG;SiPg_heW>!=&R*h2r5(< zAhqn>a@xm7t#(@Fnd}azO1BrRwmxSRhEkJe&mcx!(8ydCI?{x5%&?(%y3*-vUOwMKFFxbmYyIs#lf3edB zO?Be@1`jmp1`=Ql@D;^QUD$@9R@zJ_JEU(I?VIF+NQbY@c={&ezNE#u;!u_X2u|C) zJw6R^4J}nnId-wrQN;|JI(RIU@w9}pL6Xme~svs?DUb$qFMP9dn~M~-{m z(<&s-j^^ME7z4V-p*rxMw>m^YUMIJ@#4EKp>j`UaF1NRR9vXISzaTpab##8Sfi}mX zh_a%(tn8f#W&aOTvZ&;>EJ6fWL=^@UQ(xxetW(xtFyE}VO$Rx825PG z?md{*D6<_j+3A6L4ZTvj)X=oWSqvN^t5`MgUg%93LaBp+ZQ~Csth0qBvN)llaBjcZ zp|W4dewokX(DMlSTW#GE2ij{C+?r4-r0$?0d&585=I*+XH<<9om9*hQo9rq<$qj8v z;RWoK&JZ>f?j%9AUr^;X-`<^Y)uP$Cv@VErnn*Iqx+;ppJrM*myyDol1E3))4kafA zRH4;5d(!vE1e4OZD8|^2arlHoxgw7 zF2GH_;J0i97IchggV3+u-HC4Dta>_!trpn7utX9Z_uF;CBTy3Q1H-v$j2RS%eexQ_O1+giCwZOJY24XO>W-7tWe*WQapm{_n*vGO9fWq5ru2JadnpK)yU+C zUn*4f*<3VT?7yT<(RAB?p)O23hzNjScgk~$^q-_Hp%^bOTr+F&s0=Tt+QqMKakH2` zq473Ue7%a3%M}kN9&l~CJZ)6fYz0pFIO9^%_9b1ICd1JYfMGOiC0vfZU9F&9Ns0S{ zhCLV2u%C6_x|2VDLQco0&FGxY`aV+1KcD7}Hdy)5-bc*inO))VB&LDfC0wN_rhg z=E46~@I|E;aY0$g)sA43D%EBIWF1yG7(Lk%a0YyQ{ypRN<1R~nN0<-)rNa``@e9fY zto1?-i_{~~lc@YiPx7O`gr-7-Dm1+|eUE3YUMoPa28OOR3m~h225V5A(P0+2r0#5# zq}+Z3aBv{HT6OB5!d4Jh&l}{J#lbJBL6t8%af$9-cZT0eQ!+g^UN~GU$S&YFJ2E_0 zrt-^)w z2|R=Ry}8l*0_MZ7U5zkAz_09v_v1(3;zk*YAP!LTZpvr(P z!g*a}P)hz4`hgUCvs8G4V!+Rle=9UG7r&+Ba$cmpd-_|;fy}6-3Ar=glPE)=`;fo_ zA?NwJM7ZG)rxl%U$0H<21-QhiNPcx~|y8O1Fx|T9QP1<2}qBVML8CkV&is zuU6C%t)cNC1C2)+K&x&y?2qOv+j0tV{0F8c&r_`Tzmr$b1Ou5%W^6n^3;XNBWB#11X>%+rJGy!srCe z(YbEDZOgwO*v*UBk2)R>AVtwsv_^*}-!hZSoE5(aM&wbZp z45-;4F9Osnh2je>;o~cmBf7}rYtmFJlr#|tAZ_hRsecYsBF@%?h((VU5(Y<}Nlt%! z*_)Ie+K&b_NCOdp8Jq3;zez^o90-89H_S93WL^!hOMMZe?}FydR%B3Is&S$Tsa0yW zDbDhZ$Lxz#RY6{kD}=Gf>|@@aKR5dCG>*MP>bs|3ngcpoB97`WvN(pI=~>sDs0({+ z>LF_E@|4+X3VOwM`63~^Vh*Zg`A_ii{lr=E16|%K3D%GDI4joXA96Iy#6LeM6#PG) z`~MGq?OeE#-zjOsz6bmKVRIUGj>=sR-52I2U2w^4t~m9n_czV^6^64+@HaqB!_XLB*gZ^^!yqlNQHs+m*3c!a}_5*rYmS#4HKR(m9`~g zM2$-xk}WoY-YBvOnpRLTqkysMCVqMLTw2s^82>@vtSUt;+y;eevpo1RWi7C8DYSN_ zf;>-AU!KzzsJ&Xe*IYu&oL4XCXR+t4>gOHSphXHs)eR~t=?2&yhFi$5ZQ#`LU8~pvceJ&FhCfxcO3{NjN(F>DhM(H z1VR{LRFI^KfNV$z6rzL`hB0B2_qqdGi=h5K&+|SX|1@OW>$=YKJdW>i(va#YDI^w~ z^%J80MI}FuB^NYxvim^PDmDd2L%%q9|0qd8YpxQ5x3``q_+DR(DXVZ!H~L$1EY~0$ z^ujbN2D#n1<$|8=tx<-F3G!Pm)^d7cP}8N~K0hEB^kf!MFj%a{yr-S7UrN9%(x%!j ze>2f?@-_xD1p!s!jBL*Cdh>c>4{5zT?l3soPAs% zRCBD9ji{p#a0JlgGkNid`dj%(S#WHXbZ$wVY=5y^&@oeAR5I#sPz?^#N5kzJ=TQSn zuPgg!<${iTgrM?g@fd#%uvO18 z{!h_nSZ$aNBooRZC2z`7fW0jwhXL+Qz$pvT!0mbwFPuEc{$ z7DvbAPi7ZiWJ7I)7zO7hJAyc-(cm%zbQ4cs>j1n*rKF%?6;v_sS%$#l8xJ~byEB2x zb0X-UdvysidE*uAopsKt)3=<7jrEO|^N3tD9lN*MJSqO+#IyL|qKE>#*dIF1^k7q7_nn!i23JU=8ZjY z#c;a0kIPdl)%G}_**CAR?ezk)r%F({0VN8g;vZAlqy#R(!A#%?r7Jk(K!BE`{V!{B zD6g~r%$k*M927?rNHp{bd?IVIE)@tS`S#FV&l&p0I$31u0uQBgvt>2E1D(gT?tE~* znhUbrP>H+2Lj%X`E*mZNanZ2|c0dmnc#pcmql=2WDVXxwjzP-w5U&7n)Jp;x)C&hj zWUZNW=b98Q+06|7GT91G3X%{rJPSZAAKmkFY!)b{?TNb12jzLP_DA1SmW|G|2Q$u} zG+Q6vyKX@i)fR9D_}0Hg`fPInb$h_?l-c=W`qb$CGQ~{#%@kf+xwSe@R|OHgYO|z* z(VjY;pWBk}<3+KmW8P2eip9}24**bxMwdNDcav`uchUgJy>aE}Z!*TEqbJgfG3taR zVsQHA(#Xsyk+Sl=?S}?0h$996$muzr^EjK%Vl^(4T6{emClZQZi=)Tyw~w|BgCV3C zj8+Ix-gm744)omWVp$izq`qx8Qr4Bwlo@x%rtpMU@~Qi``PG0uTWMJ5M#89bNr3 zpo!eBHp+Yv{3{w)4ZbQ5nI3#rb?Tux$}eX}e#!0Lkkp$dV$_byA+QVUUIIXe9hpcn zrg_@D2HW%(I;1v>p8?6tp7511?WVcoxT*c+j*dK}nnaQE)2y}n8az(v$P-*?!{#hj z8m-T7+H^zoLx&~N+UC4W-Vi2I4SV`w z5x!4504;pgheMVmuG^IHNgxKt!TrXzO93%Bd=mTnJs^sUjoC?TlcJrsQ2})DiZc0O zuu%H~hH|Finw=J%F>T21^WQc#3lzl#y)S?$ybBVAf8(sK%zZmD1uzj$5vX?5^N3w- zFY1gn9m#m~US$Crq`9!29o6ocA9@(qRX7`gRZU)N{Rf}VUSqw81{`obHCbS6;}PeH zS))q`DfdDlr|Ex3d0@9`8A{o?+}rqQKm_TI*Z4F@C8r4{t-Ncu1u<;MLiyjj?-LWo zr)%scfx2j`(3vkiP7m39`ZY!1QFPrwg7JT-F+Ek z%v;kY@(zU%O{`DyRpee6|61~i|9ZiYurtJZfHgUnXz576gXjpIXR6{O9)c2lKT~Nf zM4**Wtd7|h4oBS+bZ&Z-!5A6^@Y7N|6E#pSh^Be!QW35xUZ-5EO<|d;0QGTn445ZB z=RC^*0$4#QQK_Tww&S%~nlaSuHtJioBI&9OkItBU8r8$WHTZ0-%oStsAqGUf*lnqY zZ~I>eatV}G7Vy9)OV}EI+yz_xtL}Axc=c9O4)vihaoDn8PCc+OsYR`2%7HViQ`X|i zvnQTN%YB&FkXU3`aTH?ff9{LJ`4Hf+-0%MFR)*VeJq=#2kh_?05Ic@(rFunTA%V=< zzD(ziDnX$>f1y-!SF5#@P^&M{SnPG6U zgjH-CwFfY+t`f{oDR~ERJto~jWhD?(O8f^r3+zU6&?=fCLBfGNkXd+jgK+hsInxTA z@UZ|1bl6$0xGgsUMT3-NbjV)tTFseH53^EE61`CIzyQ?}6Mwz2OR8TH z0P(?A4Q)uhBo8MR0{1%xmtxIs)fTV=jeGu&J1{E>l$A~gVEvn?8~u6i?^EBn2p_Ef zI!)9!G6poXx+^@S8#a=?s-?)uG3g(x-+pcQ6ah0He;i~y~Z@}N5pX-9)5GGUxK-{dWk5} z?h+#%aPQKI`mtea+2>aO0MG!=Q=n3)pon;K7PU_%fdD@(Bb26=UR9zi7#kZstw$?X zv!ya`*EPD*&STne#0N^o9_@s5dD9HWxt)eS(op8d>dx(I#murs=`NIUv4%Wp^+gBT zyq3O$fy<4Q&%O1RYIf#z=Rwi@CHW0+HNYWR+jzQs=^~@B8}wG>eE)ZwSI`I~VDtqj z0oTxFt=V2%>2%3jS7~UxeR;Uf+G`NEnYo;jtr@rp^FD2#b%)MTQ%i;S68j#V?Y~`Ma33bctyJDwWN1C?XVP6WFm0u zKs_y=z%dBdp`735(IP*EY4B$VTmzmdxJ#$U(W4G}JJQOKJm7YkVnKKhmq5r9C6apb zcIQK2A8nWd+!gaTBwSclIKnY3z|!xY8%`l zdd0N+Ebkae*kiNN{dSaKIO$wNCbN#1_Xh4I^ufqw^;Ya%ZTa6MA84A#Ez<&5jXwNH zr*1|K(QoL3!X^*?y=>l97cuaBEuJTiFx$}()+z;&?}yc%7b=NEaPU<-=atx`HOy9t zDD|=*5togdp6NGZZz6KrI$hn8bE$Q{ea1!~W-qLs-9pYa=xeX?iUSWAB+3~|LfYo*y&RJfAazTYC7x>=e&JVEC(j9+bl^t&{N-yL2xpAT0 zbIc&&U_EoY3Ym9uV72bfA3UT!ZplDh3BVg!4P*d9QX#OopM4@pOK z)DBPiEz#V%VFb;J?tyfd(Pj5ZAEr9=2D@7>6f1ACECEQ6BavL1CR1N~_y z)B0D4Ill}ntScL64DQQt#lu^H)o-e}b#4G}gtREx4CglkEWVc(Q;aE0c2u+AK=E;y zNi=Hu@EYWmbw3AlkhT=ZyJg$DOL zPU)4H=|-1i0tLgb({o1tGHZDm*EzBJ&gr)!BjDE%PvjlV(%Xe0d_BRFKwy|JOKaz_ zDSqZI<@ybVA9;Ac@Zw(<6Lf=WV+{+_ItLhBHmeco?`|!q+w|Wj0>@hfZoDtMyZ`sM zUbZa>Yy6+smKM6VbBLLRGn-U5Tx~Qo&@@`O`t9!rk9E!!+ShkzT)`dqG6n6IF52Mg z5ru@dpWki+WPu~7x;59SqaIA`rr+efK3UXyxYXq$TFn2<=Dvh#hYf#8a{MH~b)?mj z_hn_@&EC*okv|~>WUS6gA8NcSuh8Q^tw}x9v?3bVA;hW2JnSx6h1Tj(SDo_pt1@(GjyUe`|HR8BkYa~qy_dT;zeVFcek>gg<^haYx>VEECE zr$I#8V?7!M^6nOmoU!RC?r}_Wkn7wdX#-Ow74-2)O0Hfq$r2{ajtKJjv$)04A#uzH&+iz(;f7l0Kjg29Y{G zaO*EZORU}-KZXr*+zyaJV%u)?_y)A#L1b>#AdZH`pZ+kYMjt%Kk~_K(!ITbHvc0gK z1nely!S2$*9bZ&pacXL?^ksy#6c${J^V;9vJSYvY63U@wM{W;Oaxs*SOd55_nIuR> zB3=Szoj{!T3n^O68ob4KKKl2R7jR^5Z;9Cwy(MOfp|=U1B`-*(>vyYG zVMm@F9W~Hu@!`6!CLwguo}z1RAG{7m!e=dv#7B021PW{4ygw!;pyvp*0;zZWy8`9p z<^M0}h_$LGojQv@i|;?C6$kIXY@ERuJoh$AyGhA6*DFP8{?-31tNSxB!Rdp##HWk|pj6wuSIi&VRpl}p zxo`h|=b9Z2NY9HP4XC6KlCN=s_8>>Bew}mNI~(lkZO<5>YC}pz2m&0jh+avHT*d?d zarbgvq^JS3$_XI%2P`nC9H`0X#0D~+7Q60`s-jCN6hc?Pn1<)*VW`cwsw%7tX+4>~ zn9o+B@Y;Kq~;|b*`all^D*3RD!eBf#yo#gN$drs(eQUh&ELW*HlBtc-6>>AVyT~e` zT{8%IbG4tS-K!lmX180Xpj`veh29&7t2X-Q)HhjGy&+u}O^b;hzb}Rg@Pti>$P4PQ zsfV4@CK9c+)CaRr;Wd!IR*Yn$_}9{e;q@9|^xUEi3y}=+sea=>dLms!c$HjP6afQi zzOqL$z%{85eS5iQyJ4?))XX?J+o`%xH<2sr2weEc8l#R3qHfNCz)^JIHE{luYeJW3 zB$#QduFx?PAv-6YjV%))z7v>#vLDaKhnAU71q+|{E^(24PD;iKGKx|~b~LKmwTjP; z=#%SZ14C8&uW_UBZhU^lIOqJu^B(beOFtRf%*0nGsrW?AkHf?*{Q5iUl<|ftTE^P2 zh&W>)(sK$haS=weN;*^1uZ#sd=81;Rkp9V@(pGnn3>yBcSw6DVTI-_Z!sJlUt?0v3 z+UTVK%1C~>vxzTB6gPaj)Fb~vLF4GLiQdK6H4i4+%MRNt5f1k^S*ZoYkC*UR3L(VQ zd?kDQYfg8K=^gH`3sv0CL%Fc87HS<-x35I)q9d`9(OcpuAnif^Ul!N@TFu*556iS@ z&AzH}CnE#H41xx(_D>NFgiAo0?b!z%1CAvnF#X*i_(@d5XA`=TyKwM0=rhJK-Pk^T z?oT$F^Yt6~4gnrshRz0@TF3(Rn@HWn-bXp5fwkP^Ft;E(pVBwa`)!fKB7ZH8|9li) z*B(2rJjefZiUC=f^R3nV6r+4V>>)6c!7eH__I{@}?L*{8D8;hyPSedL<{cQtaNGPg z&saH;O1tSAdU#EffnYSYTFYC~qKa|4bpGjs$yYi#ogQy5Uh9~~bsU7W{(bMWmf412 z#t9hMBRr>Nj@;3CDP{NkTpJ*juI@^xSD_89C5mKvWvM%0-5*30uI@^7xdlXGlfQYkTBN0l1qP%XNX{d@ZqApYzu+l=9r5K$ODaNQ@vPw-ytcIQ=$%BD3M@^K29De zma$e0$fQrl0#iCV1(1z+!!#YEtU@oHo&bH@4<9Xkam!sW+oI@tFv>qo`c2w+=Qd|* z{S}8?ovsza8IoOCJUpv9mw+SqnG1RWa;nY+k)r2iyY<0HWc`Qy(6M;|-&6n#Ztj)` zd|Pc;$9T|U56$QKQBn^G+4XG59Vl|6%bL*BJhos#5{c17db|90&%ndiW8nqTU^h~| zYRuC`rsYoXuXNrRL9y#f<0&_f((au~zil)Qz0~N#MH4it9$m2u3U>xX+4TQftGPgr zO*T;>hU4~jUvXBiYEglUdQXei+n4fn=9@?5HrSQ)Eg29Wf+TWpVs>wUY3jBd%)8YE z<>$o6*wxwFp(+aW4I7+&%Sp5sOY_yK`-XLenik1@qH&9-;AKy~3)L8>-w0hC-Dv0g zmG%laFrN(;^imb;tFs1U3JON_7z3u`VOdc%;!hSU)$~dV=X}mqXtc+M6p% zCJT;w+9|~kyz&6D4PCjRXX<>H%_L{r-6>0Cs z$BC|hm~Wp!-|{4H>OQz|iWH(K@TVpqdycuSgv2{G0ZqTNIMNIom1YOgg+FG*y^BIz z7ZUf;W##!&oStn&6i98FlL&n?Fd#So!n-qAZ)U_~`tR{pUXWHM(URedPLK;-I<0Dx z)7*8BoDV!3$Nqc0Ti;v0-dn`c;xP|xg=J;@2nR9GA=zb8WQw{!ssJFE2f7j(4ee9+ zQ|~6Y7X2ycW&kYbKw!xc*RSsM&g%i^H?BT;)gCIT1H@WwE#BOE#m=Qfp5*#qB~83j zJQLyh)JGj>d$B@BnLBL_#qh@u5P-`8zXYQPS{?tMPEP;&#ZmYX;ZtiYb}Q}}GDs`= z_wf%Jm3E-`MyW@rs;(_o_;@c0-Wv*A{OEK5fdUc)&>;a7=-;R3N|NHLZWhG)fgHiM ziC`D}ep|ya&pg(J-PTx`tnL55k_=v_{nrXuXOi5v&}~46VqLmeULu$Tb7BF;$O;8nT2_E_YYE%M#f{?uqm>QfV>m$<$`43m zgNd$KD1VPWvPCMnQLBG%XoJIVYT)}2QjjOes;jyjCY9t53eI}?AI^60gL zx|ex6+~P>Q2AhDd#n3|4UZ}mTaergq_XD|6%XRF#Z)bsbP-`scubqH}zO-w__I=8< zFUNZNPC-*#yaUl>3Q*F2IF_g(DP`)N%!Aypth znNVRzLDtOsXh~NDF-MgGpKR=&vQP1?wXD+!Y}C&8OE#z)Gq<@aX zwAr~3@{iYbE?D|VLy3d;*}Gae<$Z&4cfo=J!gE&(K=0LmeNo~AXzXYK{;WhAYp=Q8 zr@TI`&Os{KR?D^cnLTn-T~qo(W+fc_lk$K}0v+)G$*w zr>td`&z0(LHFLw{8q!BgQ5q!4Z0i6?cD%4rLlA{ttvar6(vamI0F2k+zIk{(zb7aw z{@xj!H_EPJJbL1X@T9qC1F9^+y`><)ZX!%%S{q$TRp!a_7-}zvWUF1Xe<=_tWx2sL z;gB<1FDsP{qNZWgv_A7i!6Yl;rtw#L6OHW&d{zeKuY_{g=DotblXs@C)2b);)H$bZ z$g#WF5HPMO6myL}tyX55CXNwl?YLTb``z8N6soWc8gA7GRMa!O_H$=)n1romD?q}p z6h*cjYe~Xc%Z~ey)<6=jb-U`PMxq(|m0BE|?c=P!%BUfkVOI^x9i&?_+$3@jr~_TE z#8~FOfSG|N{;8K=kAMspix(ilF)~%T?bT``ky%dhJ|4(BAk;11T#vwEr`Ske_(0-`*==#xPL2=D1(SlK;K)7|WoqM3;&uPjz!PJ6C&2 zY7ofMiX?`&IGPeBNIVmq%9W&CLf-}i$HfNa1XGwmaPo@}onJwX>(Qb0Q>V=5rFw?T zq}+Uy#(i7rta&|+L$xQ`Mk9x^VAjZ}!Dl+8#n{UIFM2M_W2#kJDS4xA#bnc;V~BU^ zOi`8aR9joYORclsDs(9B(m~9}#)Fuf5(>ftpnE1Wb_9Xj;FLc zNxhXy8tmm!i^i705Fnk@l1F3jSn#Vo{e1beH!;*V#%zE~i zNkCbUa77A;y&cd$HJ{YgL&;Hgo4Z%2tXz4sgOKUwn@il1H!tju*vj8J+v-j0_=(tS zk!~~;HDTOt{*zes+dBjF-+{*1*|}LHj{*X~i9mAOH{n0jB7S;w2Ww{;zkzShM4Jb& zzgI+-s!s!fKr&WYnFkZVg_I!sb1{mLg52iXPI{@fhgQ{ zOfA_>UD?VPKp@J6I<>TopLl_FhpTBZjTWx$KdNq%BSN#^4RYdxt8yCbQVl_9j%9#R2IP@WLyO6W||Btk-(Fey|yWW@-)|@21t5Q_%)? znJGo5ogVB@QBY3lTdoPft-o9olzC_Rs2jJ-^T!}zm-#bKel!b!OsuE~Q_weFK1=V| zg9608?Df4~}5H92_`qdAz?`E_>mks73 z%aZ5MO&Hf9Ua!`gec{W77adzbon*Pfn~s;W78MN`dN8nFJkkpF>CmFi(nQEK45Cmo z@#P`0{cz$z6Sk>Uzf=ht;HmqM)l7%5$ebO&@?f~OuSw~sShXnd=A*htf5gZDUHZRq zNDu(v{Rm}ME&fh^Ve1Q8_;S#nsf#1$$NbYNK{u!3e3}RL)QP`(XC9P&E4>_fASh(V z8B!A-IR$E15dp}UsQhpIx?hU==t)3)TEtd2+>WUPN$mvOJ&KjwQ{2a(gLk@yF`xpQ z*IDd+sCz6Ond%h0%*K%^i4aFWydW&aROODH3-R;2a8`bQGVzd;uoKv7UVx$*^=Qw~ zd!{W^l#Ce!pj~`)An`gpdYBckYmKaul^=@?)PPt|P$D^qm3atl`?n-kgyv%s{a&54 zo-KC=`!~mFda+~BTeljJIlPVEY7RYs=?LlBX#Krd7ya*oA*y!5zN$V@k5)hKGhbp_ zvv2E?4*}*bq|36GO_DmB;hW?G0+F0jEJ_8GOdmdN{nS&KM+LlcBkg4`hf!=03%%*p4S zRlimIqa(*%ix=Z-TzYCybA}eQt6~a0dQG5ehR-H3c`#g4AW2)fPpxy&MLIXdYsxw{ zI(VF6wL>|eZ_K@8qU3en4bjFVsFd^3gsIxAa zkP(rr&(VibO%YqqqN32-_JIAUYO+cdz8Iuc|E=E5?4D5^ajHmRH^+vaeSu`8`*sXA zUM>OG6zCVNic(p*nvzAED#yiY7FykYFn9HJR|Ukb?Uxygcwzn+(>JtkkScf}`>%i( z{-Ily`&hpN)|VTgi_Xuu&pd3pF2YjVBOE*)ANg1Le1#|Kp9BdxOrv;F>5p3d5oAmL4iqV-{Zrbls z%xu^O_Jv}b^tm4UNRuTr^@Wd0;I&{)id9ZB0a_`UE-nl3x+nY?8R!CnVXjA%UMvGGa+l#WLu4Jh z=M=4lZK*VTbO|G3T8^tM0{O8IXhmNI(Vv~#WobLM^&J<=uO088(Er&}X9vsB6jfY# zD1ifuOO+4!{g1ampgEyvTZZ|B5r$}il z!f$3D)H5?XBWp%ay>8u2FFueY`*E`mk|ctob5$bFXJqB622x|a;1lUa*P?14=`^WE zW!P=sTRfx~MaYkH~I0FO8( z0L>I}kX>sn)cDHLjG+8`C_!jI#2TER)yp6Bgy*OebVyl8LRI}1riir)O&UdmJAZ$2 zq=fu^5ZaG$bEZTy!LeiIRkMe)o1 z#=q-5a0kh0KH}QETt+wvjukOCp*Ce>V)f6tL9d!^|AXFnfFVir=6RFf_bQXEP*6C= zI{2Nn1vu{&53W<~;umDg6NWX)f~Ei4q&)mM?6qoJh7(6w2kFlL2W8ze$ikTE@fiDf zhjx7S27n1{D^%OA=h=@lJk^fVC$?CWMdT>m32U}cMgMQYI*ymgbJu?32WPex8k&NT z=l7HG;uwA3u{&9d&p}4UI>epTzKxb_$OHMK8NWV^@__Wt#N=tdK@ZBj$fU=k@X5H> z9r53Ff_CsM9n&_g3^a_NC#XIgzYEqYQ!rB7bg^V}_K#UVo8c;Ne$3hNkuAdq!;q+w zdR2n{hxEVCY1NiRlBP#U4a&0BH#bPGSKq9 zcfJF#?=Fb+F1)v1^_rUQF^hda4tW?X1jl>xq6U609g}z~=5To6X z2p3&IhD9s$8Hj>t?XmYHoBaEGgt&fn@A9xgAHtvYVQb6Fc zJV~39LAx~Cram?+W#_fv#m!Rpf(zMh`@ACZ9u&1ALmj%74umy)(15Qni< zGpYo>M?6vBj!gdAvC*qHDPfhM8_^S8{BB}4 z&dm98d0oRz5J7#UP5cyB3??!r`B2(m!3Xv6Jrp{*Bh$N%Y}?9oN&&#SPZ!7^oU{+= z{@cc(+8FdyPxIFB`2jTAf1d;INyF|J#?nBq%-42jzABJA=`7{c7rBbp5%oi{M^?9?rzo&!BSo)7u z3ysHvW1Kmd-792CpS;Z}r}Eg(t~8?ga_{p3zr=e)Sf1WWaEQn_!*{WA4OHEn-*<+e z(A2L@yW+TzB$m9^;IAgf=bC}-M$-!cw3s2o! z^~kZ5BfG8VNdnh-gD)l)cnup2lrBPm>Z!?EYR3~e1m+khDTo{`vjy32B@EVUR;SLK`3m(8A zQ;s!?Yynvb0Z9^aaG0Z!(!^Cq-mtwsJF7De-qD3|P?fmk5!yj$6IiP2+^@)Uo1c$f zrl_6}kfi09;dvJ^R!sGc%p&n0Yd+U-8&wK7H`@W}-*PJWjR@&9$@-26C~k4;PxB0cu`4Vbvt3j$@ z2qUq!Mn?c?IcC>9eLFuWM0L6c02&R43Q@Bfw)SWWk&%n&%EP zAB@p5)?IA0Io!V+AH$oUA3QarHRaQgPrKu)vVDo!^TvOy%Nq>I`IdL45Q^YV6~)6v zP+aDaK8QU3ke^4q5WBnBTG+sf&I3$nE2ANjP3V) zNaRTwEM$|8IQ1$wLPf=KB+6b8!A!Cp1M0p9Hcgs+XBI>h-F$m;x1_OyfL4>fxNWzCAmY&>y0x$> zX;XP6#X}GgT5>@$2~Qs~7o`nizyfrk{q9YiD0hDO;ptMD@EU8;3ZIxjeIOs3vMd*H zQ4#blt{%jA-_jWZs`eRn&}jVN4T zvYvKY%|97qDYguLohv>OUxnq0^NG}(aA}A>z8figp4ru#GV|P$5og@=8X!~-DyhWk zbO0~fNHSKPaQ{SlG6%a$)(13J6#({rbS3Rj+7&x9Mf&y=b}}x;(YFLaI>h)baFaLr zeu=K@-Q!TRwM0hQdiFUj+_Q;NxH%V$%J_WT$`SLJsr+C%7g48Iv4xb3^X>JY?4or$ zHD(+dzkh@@L%(jv?9Ss?Zq>~x$SD!U!OpQK)ehUdLQ;3T30`8FrV0tg$|TZK_08vf zh(^1S8q?bOy(#mj;39`p?Q>w)m8u8lm|1Dg=M*~x>$RLqfR5+gDzNCLv^>&niSq%? z(L;Box$a<=b2{6n1M;L_o)Ah~Y(;|HvPRXTCOgxpna%~B4_Zl|a>!w>g5JoYQ%Q*} zOZ?wu4iI((2@20vV(jG$$E zOoc?~0gKeyy*hf{xP#`Kzr9h_=T%atE3`M`$KG_f_XUSLP7j;R%e0K3e-CwUJMGu$ z+_6*=Cl_cH+NtW5lDh7kJw^o4rg)E+Fv2KqV%$rU=L>lg+z zo{xri?OnGYvSUS*b&~^FCJ$qe!m}bC>)hoRw&N3_$|LTtI0gaSW!b$!LIe+&+qUk@ zUc{pPWmatlKHac@hy*~Kzw#Rp&>Shz z1$3GqQpqZr)jL1p$A6Lmx)NgRHYVjkFddL_y?Z!t+9G_2%%2wf|Cu6+@iu8NUL-hf z_-@-`O}g{qmt>vkR1+s14R?5ju;=>(*fZaqf9XGxl^GfO=Z1YlL-6W}s(HvX_!2S} z5rn>MgPIO7C}~V^szpCvX0D@SZq{;#kw{slNVW)uub4xQ-*j`?P}?v9$`R zv#Gl*rjG6E@u*2)w;u)*MLL9!p+D_JTICkocV{q&CqGf=h}gnslB%c7>+ z4cH}qvAM$-(n)>JM5yx=blcIJK+Eg#JEIY{zuXxhuN;<#GsB*9>KYn1xbHfYuk1bB z0r|0*jB$xky(aZ?L?`;4E^BlisE{&0OygKJ1%Ky33-x0aYk$ws$7>1u)evhSG+%{d z?Uage<(t^P9Wh$dC%c0Khc~J$1=Y+g{lm+atnjRy4iAeD*^4*e;|u_{f;0~Yv4DfB z%r_I)yp<_7E=#n^f9h;97xnAMCBRD@ZTiyWxS9f&P=H4$vw765%z370+C@+w2y7sU z*5V(j*D7Nxei8O((Q0RNni$tmRs6jRG2D?#8q@xArkc|W?u`kxWekrAyH`; zvj?B+Sm?C4j-pCBgJoJi@b0g7j6mSsO}TmFuGfDR)jW)4OTv8AjnefxGmFJ4CaE?h zSL09Zux?jvNeOu^Vt3}vmBjJyGc7#=cl6R9w+uOXFeCPaT7BCfG7a-?k!MznPDqK* zfPt6cw=LVWy_~yd76YS23~6(hEn|nw9vAE8c&g2ls(CYmX2?*QKw}Ku@eeE$wCJE1 zb?(m8GfVfdhzMz}D9Qk$(My&Sph{H3#(ZR~SS|2aDWb`aK*vQ13+UMx+O5IIjm0a-a(VBx2R+bvoAbKIPtapaRtod7%xZU^ai2h4UUikLLvT?`$sMsaF ztv?%-&*ay>pIXthCc{2QtNqa|Skq>iLa>iBJnLWw6XCBR!H)V;Pv+^8KWs!)O znY)Z?4Q1sRJjpG5QQy)tMs6=FmPS`LCKli$yrJoWCV!HEHN)2?zMUQXYoMA-z8Xb!Haq{K?r+!k?p6K+t@0kfG$7_*54dvttF*39=e@0X z_~@R^?V&;dDmrf3cJ7}Zn^Z%9i0t}$7-16R$YjOxU-q0LhHmG9jkfBfYmAIFh4YU2 zT-bWLctRQ0C>MvgV(CFLbk%UyL+cZCT|jE zH@g##Rhc}s+&B^zAGF&A%-%d0Wao!U=|HhFQ_<)EyWX0=Fn&!pC&GV_SfvfJK|rLf zksY(#rLuNcf9a!=Q>A-NJQR6qJxrt7cr(cc9k^yb5#++)BC^%lbe1~Cbm9Vml)&U~ z^c1jC;J;Dd(KhiDVif2bCFcBZFK-u?0wxjw9@CN@?kIfF(>M0H#b0CToEaU^Zt*K3 zQ&ruaa#_baw>i@u=HKl8eEQr9JBX~s_scZM5EAd|_KYlsYz8|hnt@B)z5-i)h>QAM z3={2=MJ+DWUrwHT2On_+Y2k}vcJM$s}|k!ymyq!!f;eUH*Q{FSG(3GV@B2a23oCDh4Idu4LMZ8R zOT#MHWlCVBekUF9b{!pPFu(R!3|%NIbJk|CHrA<)>XY8mW4!gZ6h&sLvl9C-PlhO^ zqb`<$2uS=&Y)G35sDA~5n@HDm_qJ{p6szyp$DR=~!ifvnTQ-d-QebV*lUMe|i-3Ru9^b*$(^jid) znSE4DYTgs8@U@Zd?^>$XgS{tq;+@@ea(_=CYbE3SkpNWBx$vNUP%X1hL2y~I^{6T~ z4RL#@OXeleG%IoQ#%{NUp$+Dpc*vRN_iuAp?>r5B+G1ik&v&4AStVjUqpc5rf`&GF zQXCmfp1D(F@7Am#x$-=K&?|2H>_ER_88xzVQ=L~7?zOGG%a;}p>#Sy5G3a+65sOtI zR#PzI5BY82?orXiS69+-<{&Y7XVXrhCbzg&teP$OVe*Q-{E2ynj9WVJqE<{cu*4i`724isKkNfL+)uEw{{0gGzDO&99sN`@`~8mdQyX1~-0^O6_~}{U zzXyaf;`~UHwtRgoiXy(se(278XTN|HB2b_JnB9V0mCyCvIWjq+DlpCcl9*y}?i1Z+ zT)zQv_`pR=^ACc2}k_j5v}^WATgud7OqIF>E-M+1;npo*B) ztnSQ{K$PTn4E{E_PpmW_j=98oa0_IghHZA-2 z*KSaP2)~d!+W<4h2fYnJhLV_!6TKp!{`L1O6@pg}1B^+JVNM&yQt4`tLEK~h;<#5- z3X1sm+nFhi_q<583*}n$n=xQPOg&sKIQ&4I?PJIo< z30jUGA+Z2AYk383$k-FtamZ2dIA{_bYbC1jX;S4QZ$;krcwBt<9#^)5dTbRBZ)_9{ zhbBMF!68!pdc9JB$*Ro1B?3(>iy$L@u+0b3sI^D>?+{B^=VV>77U%`_Rr~+i0zU#c zy*fnIJkT)*hKW=~4iD0d|HLw#U9dieFZMT>{%y|!wYNz+WWN+ISCBNOX|4Pn6pOOj zxwVSXMJm<@;x%wl)tXm$+NKwu|BbqD1r8YrP>UbnA%n&WzSx|pK~KVWC=Bp)3B}2%-Oen zX^5!(8>>j3-`J2YOOnDUg;cs|i^yq}Jp0b}Y1YS??5ki&FY~;CCh*(}!js~fDA_`= zITM=*oWRh@nrWM!62|evs_5G`nPd@hsdX<^uZ{R4W1EiK4KP~fm=>~aJ|qW{2h{^- zr74haMwDp#gw*97J?vtqdK@k|dl{}v!zEg95LV67 zu`&?wFEQO)t~B48g= z=+AeY-bk@1(3>ujN1TiP#zn(X z6LrUBS`J+_%Z+N+u*bBXq7~moKw2S5uMA?$RbWc3@Yt@`bdn636aiYAp@vNe^NPwb z;ju29K}qM=knDS%PT7CX7>EN~u7ehR13F)cqp>>>86Jl7W)?;VjC7-+bleOENf;B* zh>HgV%F&zr4)$gjWMQGL51ljpTjY}m54(IG^oIwiH@cyS9H+_3x~~s)<&LeVD|?Rz zxI@{eB0`r^3{%0kZ@tHcG50ZlTgg_O;ff} zPGt3kf8gNP48H8*>bgUB9uMd6jI%gXXkg1lRk$DH~M6mmAviYj~=7XV;@qwgEau#%nr8g5M)(C+ZEBEgq z^W!K`OY(qsTElVqfu=zfY$E3&vJbt_OcWw}{WREM{iCtgn~Og4AZ=xvL({!&cVw*N zD58Oj+$ww$j!yqwSfxA8lU)IX*<+qSuwxnl5HqQ7uthC8>V+Wrjwl!8di{i8#pCoZ zEi8$V%gbh@O(Byav%1btilkfMPj3M;OO2DT8WLbAM|P;+A&@+?hBcrTU5k9mV9X=W zQS5k+)-Yx(M2}e&5%q4H4!WH_Z&E_3O|{SBLlnkJ`?C7q?w#0d?h^P=tmP;?3+hYjAwng{+qec^I-*S%fofYqCk{eME?r`KBZN;QMr8^7;QGFb8g z3TpRe>*(l3F*a4L&H`#ujdg+JXi|q`mh>nsgtGhCn?v7pct`F#nQjTG4o?N98@&Wx zBQVqhL}QYdHHQ#Q9N&(p@4c>C62jqG-@e{+j-0RGqvP^PwiD8^3BQ!*4%~Uy^|T5M zwjKn%vr%9I8oZv?G=G*wkAa$5n?qSCdbJ)W-M(iS;I&+;=wIo)(i~in3kSg|rlwv@ z-dbFssL2ceUZ(GLOyjl2_HwYQM%?_k6ak!SsR0Fbh$8uK$1tRsbcZwKoVcy_Y>sdm*NRdDRy!* z^(`<&tmy)+E*ZoBJFv{*JJ<(QU5Bk5u8u3iWOoQT`?flTh|?vZ?>Y)i+X^DEc zqNnXtR}3O{({XWRDD=qs$7ez`8XK83O#a>gtR`Ul0!E!+fSfR9ZGABhYH55Q*AbJ3 zVOgluH-;TRT?I1NwL8lkk~BphSp9*?GN_l!N*D*Sf?a@juDB_tJsIPcT4n^ zAh&dzvF~41<9ZWYMv-Op9KCeY>{aa_0_HD-^?JE1+`|eAgrsQd+o4zKAKzB4V(cU})u{0za-gR+^cRP+<596@FB$@xcWmQJyCVMZ z$E{zT`O{K)tJ$|-bziutcYtN|?YDeKqPIMh6hCrg>+Q#-CoK+b|19UaPZvgebA^YXGwa$xxTrM5FaX>{zY`gV^@ZT$Uo`{A+Oz!pWJ+$2fXB{&q0s6$vu9#3PInW zfM1t8c)Wgck0j##8|^uM`G#`J*5Y)eW%ZUlGP|@69IrorL14QHcyprq#~-yHdYc>D zB@xt}XHJs%nUaX_r@lS`|8DmeeZ>fPi*jBAQDob|#{cEJp{*G%C*X&Ie!p?&$=|Y) zc{{4UjrIQcw}69k27Hw?jjK15wRbxm4@5Al&$j$D@XZ%|7UzJ@Lca;#uKy@~&(lrd zAVrN`8HnsZIce(npTo^L2Jb(fq44kyRw+B_)bW!Myr*0C@95rG$5$Kw52kT)&(DZm z`8;A;7Nyso=sJM^3;G^w%^K^jt@>^E)K|yxcEp&P>K#$9zJpGxs@Kqudyt|>+itL~ zJeS~n??16S(tq2FtHK^YT51_1&9yR4nN7d=sd$p|htpCuQR0e{bBYmpFa*t-xD*;| z-!LlFdz#wEKENFsYA63|MYWC|o3=}dq4vo22xL{_OVm5b6;=J7 z-eg8y?1M5n;WN`!UZq|8552h@ZUuYH8ut~eFZ)`h{;)+^$8E0p=^O)grIkp~tKXQ- zy_MsN-zEBroIZJF^vbNJ?Sjp?cIIS;x{_e0jNc?wlPdV_?(bKaN77F#?;ASBQp7sTQGegukshw6zwH~stN zq2nii`eBa@^zXRTE$mmfuo!UP37;SphTZ76azptJ^w%(y0_Scby#ep?XUoCkCy$@J z!La?jthaCT}Tp9`ygG$ToNH8iLg7~b_4g^OyUf5tV1Or$b<1v z$6N(BvjfM?t1Mt6XWD}YZf4uirkq|~iFP7n*E$VQ#j-r*5a(&YH0vrfyz%;~TmKSG4eW zL3XU-{y}21(?H8&m#+@4-buR<^43k|$j#z3V!j{zL z9>@gv=Odc3i<}hC(7$6_rsmj*_GaXbsabMYFuawh_XfVHI7^7w)R80PP<5Z6Rd_g4 zn_b8=vPoMVH?)u&`mAM4DUb=;t zi6nySLAv+;OH1Fi+rRj!-Frd%ZIjJ3e^U52<975H`Pv)6(!!uui((9{s#WVIy_xqW;2HBvOQm#4DVYwNcryIya@d+m!Ow1dcLU{ zxhdUQ<&_qNpu$MgQ;>XIl*dDY4W<3SI}$BKtH>Hv#hRyw97_Baxea^f1$2zdL*@*ZF+T`CW7UuJ!vp=L{4do(X~F zIIT{}zdK=_sqa#JvSgAGR#L&CaQk>(K5S%#3HL3_x^l+Q6^DzIBx|O9e#yZ4AcaF_ zPkhGla_A}5T3-P#|H|Or>59-=q@*7ew^*$7~Th#{gs**$|j*JMYkJH(T-IjeHHvo(IMpI{?<3vxlV1O<&blEOB zw-jqsf;I$7rH6JEv3awY|3Xl2oP=riiSH;#{NS}{Q2)q0>v!00t099*Qx7S7gS-6V zNrB0cX7!wni^-j=mhZ`fqEbpCe}a5nTL8Z0GROk4$XUtN1>;r%hkuyVHRv?GrJOE4 z=!Y$cQE4z*bLMD;cz*0R$R71yNOu%)fJ0N|xf{irRbHZtfv+k@3T8$SEP<>_&EP&? zL5TO`rI1g)F@56%vlbM?5juA`;Pd_y=y2E2ZFQ%BE)uIJXgpRT6KG;b?EuB&v>;YW z+EsE}idu8sAf|xnBIVJg&6}=XCb9=(Ax2u2TN80a{r!4C^m_PPu5jo)u<>*|H|tSa;tFwx_h*CHP0cD8rRj>F6YZ(jA%wM-dy7+#m zFnb3%R9^K{Y}pKdctJY%t01Gnb+U=3-x&nX$=l6>-?eOUg(%QoM|M=D*5VX#cD|YvK0qkRdfq zrbpcM24>AIsR!SZl7@S$Z#!>X1s-m-?-_nGzfT}Mx`VWA-9Hahi%ra{Hsk)FTjpJ- zYo{ZF=dSCst|a$GzH?k9dvm&3xr!2CC;3#?h>iL&8kNB9fiTM7VXPB6IvG3&-tSec zGi#fac+0mlJ63D%TF+71TQOOdk=_a(a{WQp(@;3fE-P5ddnJ1C&5&uXRdd4Zz-aW8 z?9PL#;hcWa!zYOEXld}s(r0&@?g^!GPgC{dea~H=jp5`1j~lO!g}RQ7J;#s4Py^fj ze-o!!eZ>*1juX*#K!|0DS~wj)Kq&l79LJ!!gU8!2EgO>l#cG3ma$X@_{tPQD72e~t z*_Ar*Xp_O=72W8sSyS*2m)Z%3pixEPqC4Y|CilKf9ZT@#I8uqeXB~YOX%Xuu&-OVZ zl^jrSj1RB*6LtE{YI*9{Hm3rG#3tY3sr)bE3Ug*PmB|Pf2YgY1hp%eExL!W{Y`^TX zlll)X2u0$B$VRidsVKQm5GB=Z{&%Rs>3cZ^K)?}faD{Y<+1jBt-~4RaWLFiSbXm=J z$3g6+H=DT$6Ac~Bap>|2r^o>49z@jdT@;ZbZy-X6wARXdnEI2`dI2}i`_o4RH=cPs zA!J$vCMn7`rJ;nfQT92}9X%Xd8J!GsE4lU7*nCggI*>17$V2mxVv~KO*?GL6|4BgH z#-#)E1{7^bZtAtWXRg55trsIV!ZMy{2B+*B-E2hm{V_1vwl%JB>$yE=*VTib+%A<8 zAUpjoid-0FE>jryQtecFPWIJW`##qS{lSPZSx87uy>OBDm;0WVt(zrvjW%l%4@#&r z(6W1^w?`t$<;HJPfZz%a;x0~VLD~uQYuKJT*z2v}J$6-n)&)aY3FUZHbdjc*gO z!;92V7!4F}5xLXJ{JN3cgLniDi@k@7Bf&Vlq7wkMGFl;S9H0U>+u+(vB}_#cq z7a+{k$xHk;V&JEJ2Rx))6m9u>wB zjOw+(jiTl_oShYWLFZR0B3aZY>AT!Rfs(7wj~nLk-mulUX`L=(M_l2~r0<&aL9JU7b;RQ&(l!V3SAD4B4hR>j8f$5SKNP{ujNV2r(YxQYYoz!@tX@9NfS~V zt^wY}kfg~rZJ7@t|aa!Kchc&G_4C&e%bdbmvO@V z)n&o@V?Vos`ma^H=}!?&FHMZp$i*l-H(E%LEP;H^DfBCfp$`rj2Ze8{_b4cMdgo~| z+B33EX&-7Yf-49XFZ=k3hS#N5>kE#<*V^Jc@F{?3b>=@ zil3)zwvjFs{m_{VxM#J@&y9_iW9R95F7>5VbBa1}i1K4-LEE{PqCRZ$!=DXc82c}I z?JEJmHUT5%l_Vep7ZsL=ZpP4RsW*E}@wNLC9cDZ`rFJh}dEo8qR|LLPr>Awwc}#xZ z`Z*OzT6C*y#bCqJX6?%32K)0iyV4JNA9jM*$T0fWhdKH{M=#~?o|c-RI%Zc!-8BdjKU`m+lP4*#iO2Fi6m^_>@PHJL?2cKFRZJZVo~{=0 zbKw1nm#_(aON#J`a?9^!`d=7A1Y9d3vddZP&@8{GjXx`ibs^CDAe9LHyVS{X$??KM zmnL03M9mt{sU?X^z%Q&ZrFJ_>NUKX%fy9Y()9SV!y@VN(Blxl}eCku(y;hoidYh@5 zwn_tB@~SWcq}lm_P@T(UX`w@7=(MB&yt$oj zO>hsA+w{7oEc=+>BYnWet+S{_#h4I)LWwd@FNNMuc@k};+r`MO zoq}2Qnm?9rc$*>ZvOoudN1!_tE= z?&f3z7X4!GBT-XZ!tpILEwZ&x$Ias2l|x~&+KCeV)d#0bzIN|TpOoprQgG~kFI3ps z>+0Fq{>%;H$f^dlUMbqgaljS4#ba7@l&q(+{PLNs*=|yde|mwqP9y!n%cI+U`Z{Di z3liT}&9y$19RaJ^^5Tx zzmeos7XO#}K#7aQK2ud5wN8|yL5n^iT@%qW#lSyopTnqsq1RNVkYWvCR+-USlMBaQ zSW`o5HB(8k`?IaxOTiq}Qux;^7LC`vYXC;iS2HJsY7tvSHQ61l^ zaB#qAMHjiiDQ~p!+M*=5!$LC?trNz(-iN{v)e3?q)6uciP`sB9p9`N!KkBUltI_Qu z9>7#(FTuJ>&n-t8@R-N*oSnfx>$}ZS3GqZgXK16{p>5pkMTRnobTFF{4rYYwWXcID z(d!3MAc=!R5LmZ*u*P|%WGM3uO6n3@3`J@9cApwCIfWJ?zW|SqsV^p*ldNZCyfL80Q@2Olq!^ADt$k! zTOgq^XEA*{8BR$n{rZQsZ)(#@|D88O z{%lkUD_~8nMvqlx7tzrp{V#SuUIWwh#+{`Xz?3pX+D?5TUiTVDq4P%JP&}n*`pY52 zZ1E<>R5OZ@&T@aPb1T~1Z-`qWP^RYDpsBRW(B~Q*Q6+mjX&31in3@@y%7$BW-)U0Z z2hcD4_R$&W1^68UE^7VN*}l*oL{M&Us}>4_9ng7i0#jd zQBjZGuk&y_Kqm|waXni^HJN}ubcex=fq?jS!H?IorB+@iV6#<$B3cwg^t-`6m!J!% zeBbgq^Rbp3Zw3XjwsnokTl*1DU3?f=%;kezi zt#F2U^rA3xt>&yrGP>MPv5k5#F)b^et-=ne#fSzSWy)ic^LJ4G*&c%MOlP`mNPLZS z3I_Sgarj=-v9VWCU%j*oD7PC3v3P6(T9&s+taqLg1lP#ZhXq!O;prL%dkD`+q{`L`(mpvjvbjSow?c{9HmlOyV!f`_9CQRcF2Ja zh^Y=;eoyfR#BH7_BT_^SXlH0{wwvXNS?!%rH9fyB8CzqsUM+s4Wv~IC2wd{?%}?Rk zuTo#0vpb9$Jkw#x&UoZVt(fc3cXA0okKLCefvzuZD z(--|7K3bp0ff%e?s~FdH9a8r2t}FwAL2y4bLP(sW;wq0t<(Arp)UxEU{B}WmT)WM5 z@w2D3eTAxoX{Dy3fQ4Mo(q&aAW(cg$HF~0B1`8gF8%STz#gw4ycpuSTaagj!WYqD# z+$xNr)kQJZY~O-~Kb7Oy1ViFm9d~C6-hzr}nYI zbsnwc11x^7OSu)@VfS+15=cZS1$A%_1iPZ?no9Fb?yiwStuR@K;XBY- zNPL8$2@YRt?+_+w6dR%)a#i}iWVlFe5b5mrQ_Uh^qcQC7RVzJ|k1dm9q+M!(!hEU+ zyiLb?Fokkls%46s3_WblgCl!Ir<5?Z70Y}4*2HDp2MK%s8A$-0%=fjA#~&c*ur&gd=AwoBvb7`-6GXs=($ zQ=b%I(FF#O_#A}h&Ue7V0_A0uvp z=dNdHon~@r%U_AG3}+qXj=c6bPRP*Hi;#HYll=z!@JC;Zjw7)wc#8>!-Et#A{58PJ zSGMx~A+SZ&dZJkqov-GX*&B!fbU!^5kBm^0J=juM-gtxqxCq4V<}c=JdZyDGD!}kX zFtl@4&fE|4+ZZ%W(48UteqYc$i*qPQO2S?^_}XCCmk#7+TP~6XQvTr0IbRWP5tom# zySbQXDHeO9$32`phR-jMJI4@n_aon!)<(C=6&%NVi!WAsxw7Q4?(5))sFN0@Uzj%? zJVjhsp!c^kpqc70_|>Y(%WiMxKSeuMxu(&)3%rYv&`;wC+_Cx>GVq$s%y%xryn%L%X8U!=9BqVa3~6S zFWKjzm2qMt?UbCun=}WouU%M}ATi*-b4%ZnK)n$zogoeLB#r(obtA3+bP}KoPtQ~Z zlm+lQ5-5HGsJiqZ_#}`-&H$IM9}H5m^uIc;k1D9vc5%mV^qM-Bd<3mk(H(lf!)o;F z(#Q4rL11W2n2AQo2XODKx^4PpEPP+Bx^j8jZ@r3^>O!&%@x8A)t0ZGlRv#g!9^8 z5nI=`>0*#r_m}9p z0(1h%>%m#G;8klraxDTat}k^lhNN(q83As*EO=v+fHk0COtCw(^W^L%>clnSG7m3d z;mur^^b9i|`@?&WcuYd%YmTdGQo6)A*FTbr>(z-{rb9TG0APQK?Em&DfIEUjBjv)8 za&iDJ3V?CgzikV$6a+8AJAKl}om-T33MmoQ|5P9jCEdm^(eTUY2tySe5z2J$1Nx3H zeiL~xVwMQW)T`_L{BX*`Db&0|29>t{0dV%I(-@%RcK&z#LbeUU@kl^iBQ)6ZeR{GS zzm5%3t}IydNwItH3)Xznd>veanz`W8gbN%(qg_f}A@kdyMr}h`Zg^s8V%=clcwq6= z2zko#`Ct5arpICcoXgZO$x0Xm^8(yt?7#4@GnT`!s0WCa-x`+scyTLisZDdhcbYi2 z5Ujh^-fP68@0xXP6Pk$th{1`no zYc}t&_i9lI3>;^b4O>?te4CvaAlMa@CkD_07xDXZkzruFI!kCIMHT2N+(y?=R7yRt-ot zuMy6X9*|!HvRZgk4v-Rnnfy0n9eQ(OTjeh$I_4?tQX7Lz{{(b@HvsfkKNw(^)C3&>PP874TS08p!V0lb~gE&tYE zw)p^7Y;JZqfw5W+685=OXXsx%A=*mpfwKxQZazXCGS6QrCP0Ftebs=WA1P!c$y z_zmFdXLwWcE};K?z+(XNMH=v=RU{==^Q^1U81pYrXtC^^;pg7+>4ah%?#d70Vz^D@8Xz^Z+m9MpfY9@2R zJBHtwD6gU-21pYLS8paVgYB*(FaNoLL+K+GJ(p1o125S6gROwHSB`yh1VQQQ_XuKd zo@&s9yn}#R>+gA*5Hk9xlwF0#KMPv to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) + +# fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) + + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/src/double_buffering.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/src/double_buffering.cpp new file mode 100755 index 0000000000..9884295b08 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/src/double_buffering.cpp @@ -0,0 +1,353 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include + +#include "dpc_common.hpp" + +using namespace sycl; + +// For the system_profiling tutorial, we execute the kernel only a few times. +// This makes it easier to examine the generated profiling graphs. +// Note that the performance advantage of double buffering is more apparent on +// FPGA hardware with a larger number of kernel invocations. + +// kTimes = # times to execute the kernel. kTimes must be >= 2 +// kSize = # of floats to process on each kernel execution. +#if defined(FPGA_EMULATOR) +constexpr int kTimes = 3; +constexpr int kSize = 4096; +#else +constexpr int kTimes = 3; // originally 100 +constexpr int kSize = 2621440; +#endif + +// Kernel executes a power function (base^kPow). Must be +// >= 2. Can increase this to increase kernel execution +// time, but ProcessOutput() time will also increase. +constexpr int kPow = 20; + +// Number of iterations through the main loop +constexpr int kNumRuns = 2; + +bool pass = true; + +class SimpleVpow; + +/* Kernel function. + Performs buffer_b[i] = buffer_a[i] ** pow + Only supports pow >= 2. + This kernel is not meant to be an optimal implementation of the power + operation -- it's just a sample kernel for this tutorial whose execution time + is easily controlled via the pow parameter. SYCL buffers are created + externally and passed in by reference to control (external to this function) + when the buffers are destructed. The destructor causes a blocking buffer + transfer from device to host and double buffering requires us to not block + here (because we need to launch another kernel). So we only want this + transfer to occur at the end of overall execution, not at the end of each + individual kernel execution. +*/ +void SimplePow(std::unique_ptr &q, buffer &buffer_a, + buffer &buffer_b, event &e) { + // Submit to the queue and execute the kernel + e = q->submit([&](handler &h) { + // Get kernel access to the buffers + auto accessor_a = buffer_a.get_access(h); + auto accessor_b = buffer_b.get_access(h); + + const int num = kSize; + assert(kPow >= 2); + const int p = kPow - 1; // Assumes pow >= 2; + + h.single_task([=]() [[intel::kernel_args_restrict]] { + for (int j = 0; j < p; j++) { + if (j == 0) { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_a[i] * accessor_a[i]; + } + } else { + for (int i = 0; i < num; i++) { + accessor_b[i] = accessor_b[i] * accessor_a[i]; + } + } + } + }); + }); + + event update_host_event; + update_host_event = q->submit([&](handler &h) { + auto accessor_b = buffer_b.get_access(h); + + /* + Explicitly instruct the SYCL runtime to copy the kernel's output buffer + back to the host upon kernel completion. This is not required for + functionality since the buffer access in ProcessOutput() also implicitly + instructs the runtime to copy the data back. But it should be noted that + this buffer access blocks ProcessOutput() until the kernel is complete + and the data is copied. In contrast, update_host() instructs the runtime + to perform the copy earlier. This allows ProcessOutput() to optionally + perform more useful work *before* making the blocking buffer access. Said + another way, this allows ProcessOutput() to potentially perform more work + in parallel with the runtime's copy operation. + */ + h.update_host(accessor_b); + }); +} + +// Returns kernel execution time for a given SYCL event from a queue. +ulong SyclGetExecTimeNs(event e) { + ulong start_time = + e.get_profiling_info(); + ulong end_time = + e.get_profiling_info(); + return (end_time - start_time); +} + +// Local pow function for verifying results +float MyPow(float input, int pow) { + return (pow == 0) ? 1 : input * MyPow(input, pow - 1); +} + +/* Compares kernel output against expected output. Only compares part of the + output so that this method completes quickly. This is done + intentionally/artificially keep host-processing time shorter than kernel + execution time. Grabs kernel output data from its SYCL buffer. Reading from + this buffer is a blocking operation that will block on the kernel completing. + Queries and records execution time of the kernel that just completed. This + is a natural place to do this because ProcessOutput() is blocked on kernel + completion. +*/ +void ProcessOutput(buffer &input_buf, + buffer &output_buf, int exec_number, event e, + ulong &total_kernel_time_per_slot) { + auto input_buf_acc = input_buf.get_access(); + auto output_buf_acc = output_buf.get_access(); + int num_errors = 0; + int num_errors_to_print = 10; + /* The use of update_host() in the kernel function allows for additional + host-side operations to be performed here, in parallel with the buffer copy + operation from device to host, before the blocking access to the output + buffer is made via output_buf_acc[]. To be clear, no real operations are + done here and this is just a note that this is the place + where you *could* do it. */ + for (int i = 0; i < kSize / 8; i++) { + const bool out_valid = (MyPow(input_buf_acc[i], kPow) != output_buf_acc[i]); + if ((num_errors < num_errors_to_print) && out_valid) { + if (num_errors == 0) { + pass = false; + std::cout << "Verification failed on kernel execution # " << exec_number + << ". Showing up to " << num_errors_to_print + << " mismatches.\n"; + } + std::cout << "Verification failed on kernel execution # " << exec_number + << ", at element " << i << ". Expected " << std::fixed + << std::setprecision(16) << MyPow(input_buf_acc[i], kPow) + << " but got " << output_buf_acc[i] << "\n"; + num_errors++; + } + } + + // At this point we know the kernel has completed, + // so can query the profiling data. + total_kernel_time_per_slot += SyclGetExecTimeNs(e); +} + +/* + Generates input data for the next kernel execution. Only fills part of the + buffer so that this method completes quickly. This is done + intentionally/artificially keep host-processing time shorter than kernel + execution time. Writes the data into the associated SYCL buffer. The write + will block until the previous kernel execution, that is using this buffer, + completes. +*/ +void ProcessInput(buffer &buf) { + // We are generating completely new input data, so can use discard_write() + // here to indicate we don't care about the SYCL buffer's current contents. + auto buf_acc = buf.get_access(); + + // RNG seed + auto seed = std::chrono::system_clock::now().time_since_epoch().count(); + + // RNG engine + std::default_random_engine dre(seed); + + // generate random numbers between 1 and 2 + std::uniform_real_distribution di(1.0f, 2.0f); + + // Randomly generate a start value and increment from there. + // Compared to randomly generating every value, this is done to + // speed up this function a bit. + float start_val = di(dre); + + for (int i = 0; i < kSize / 8; i++) { + buf_acc[i] = start_val; + start_val++; + } +} + +int main() { +// Create queue, get platform and device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; + std::cout << "\nEmulator output does not demonstrate true hardware " + "performance. The design may need to run on actual hardware " + "to observe the performance benefit of the optimization " + "exemplified in this tutorial.\n\n"; +#else + intel::fpga_selector device_selector; +#endif + + try { + auto prop_list = + property_list{property::queue::enable_profiling()}; + + std::unique_ptr q; + q.reset(new queue(device_selector, dpc_common::exception_handler, prop_list)); + + platform platform = q->get_context().get_platform(); + device device = q->get_device(); + std::cout << "Platform name: " + << platform.get_info().c_str() << "\n"; + std::cout << "Device name: " + << device.get_info().c_str() << "\n\n\n"; + + std::cout << "Executing kernel " << kTimes << " times in each round.\n\n"; + + // Create a vector to store the input/output SYCL buffers + std::vector> input_buf; + std::vector> output_buf; + + // SYCL events for each kernel launch. + event sycl_events[2]; + + // In nanoseconds. Total execution time of kernels in a given slot. + ulong total_kernel_time_per_slot[2]; + + // Total execution time of all kernels. + ulong total_kernel_time = 0; + + // Allocate vectors to store the host-side copies of the input data + // Create and allocate the SYCL buffers + for (int i = 0; i < 2; i++) { + input_buf.push_back(buffer(range<1>(kSize))); + output_buf.push_back(buffer(range<1>(kSize))); + } + + /* + Main loop. This loop runs twice to show the performance difference without + and with double buffering. + */ + for (int i = 0; i < kNumRuns; i++) { + for (int i = 0; i < 2; i++) { + total_kernel_time_per_slot[i] = 0; // Initialize timers to zero. + } + + switch (i) { + case 0: { + std::cout << "*** Beginning execution, without double buffering\n"; + break; + } + case 1: { + std::cout << "*** Beginning execution, with double buffering.\n"; + break; + } + default: { + std::cout << "*** Beginning execution.\n"; + } + } + + // Start the timer. This will include the time to process the input data + // for the first 2 kernel executions. + dpc_common::TimeInterval exec_time; + + if (i == 0) { // Single buffering + for (int i = 0; i < kTimes; i++) { + // Only print every few iterations, just to limit the prints. + if (i % 10 == 0) { + std::cout << "Launching kernel #" << i << "\n"; + } + + ProcessInput(input_buf[0]); + SimplePow(q, input_buf[0], output_buf[0], sycl_events[0]); + ProcessOutput(input_buf[0], output_buf[0], i, sycl_events[0], + total_kernel_time_per_slot[0]); + } + } else { // Double buffering + // Process input for first 2 kernel launches and queue them. Then block + // on processing the output of the first kernel. + ProcessInput(input_buf[0]); + ProcessInput(input_buf[1]); + + std::cout << "Launching kernel #0\n"; + + SimplePow(q, input_buf[0], output_buf[0], sycl_events[0]); + for (int i = 1; i < kTimes; i++) { + if (i % 10 == 0) { + std::cout << "Launching kernel #" << i << "\n"; + } // Only print every few iterations, just to limit the prints. + + // Launch the next kernel + SimplePow(q, input_buf[i % 2], output_buf[i % 2], sycl_events[i % 2]); + + // Process output from previous kernel. This will block on kernel + // completion. + ProcessOutput(input_buf[(i - 1) % 2], output_buf[(i - 1) % 2], i, + sycl_events[(i - 1) % 2], + total_kernel_time_per_slot[(i - 1) % 2]); + + // Generate input for the next kernel. + ProcessInput(input_buf[(i - 1) % 2]); + } + + // Process output of the final kernel + ProcessOutput(input_buf[(kTimes - 1) % 2], output_buf[(kTimes - 1) % 2], + i, sycl_events[(kTimes - 1) % 2], + total_kernel_time_per_slot[(kTimes - 1) % 2]); + } + + // Add up the overall kernel execution time. + total_kernel_time = 0; + for (int i = 0; i < 2; i++) { + total_kernel_time += total_kernel_time_per_slot[i]; + } + + // Stop the timer. + double time_span = exec_time.Elapsed(); + + std::cout << "\nOverall execution time " + << ((i == 0) ? "without" : "with") << " double buffering = " + << (unsigned)(time_span * 1000) << " ms\n"; + std::cout << "Total kernel-only execution time " + << ((i == 0) ? "without" : "with") << " double buffering = " + << (unsigned)(total_kernel_time / 1000000) << " ms\n"; + std::cout << "Throughput = " << std::setprecision(8) + << (float)kSize * (float)kTimes * (float)sizeof(float) / + (float)time_span / 1000000 + << " MB/s\n\n\n"; + } + if (pass) { + std::cout << "Verification PASSED\n"; + } else { + std::cout << "Verification FAILED\n"; + return 1; + } + } catch (sycl::exception const& e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/with_and_without_double_buffering.PNG b/DirectProgramming/DPC++FPGA/Tutorials/Tools/system_profiling/with_and_without_double_buffering.PNG new file mode 100755 index 0000000000000000000000000000000000000000..dffc959919ea58af03c48eade765815b8e69dfe6 GIT binary patch literal 91631 zcmd4&WmuGL*9Hs&L$?YdAp?ShNGJ$M45@&m7$7wuE!_+-q=3>TNDoS=bT>mNA>Ccl z3=KmJ47?{?_jSkf+~4;6`L=g^f1o0x$9b&S*S_|(&W{>u3KXOaq&PS@6iSNnnm9Q4 zG&nf8`^1F6-$a?I;R3(mI%z7%;uQ2StpVTQo6D%m;NTQTlARe50N<0?E532U!MS<& z@(Wi!b1`HDkRHlkyN$S{?$$}i`lv0qI$PkpE0Y(j!~cwO4N z4KtePp!YPBji)DJi#a2dj{o;^ck$;5y$}N_=|6vL z`H+U@A0K>AJz&Dc_0NBo*`cAq^7rQyV>1xae}68DLr%u;pPy3=^2ht@B;dnIJg&Pxz9i;9k7kn~=+@8C3 zQt^vwPsC^64mEIbh&-DngUflVg4T~JmbzDhCpmxdgE4anm5)zRXzp*PSfrPsR50du z&5+wx)nRydWF5aJ-^m$3-e-+1#x9~qDS=QE@a zWLfzAI^~RlvGGaq{d0RlIAfWug$#{`;ux&aG(u;G5i{fo&vefux zkFL-~aoZ*ldW1}7CogM(y`R5MuRtAAu}B|a%6b%SV1s2_iWU%GWBkyq7($Bmul-P| zbRK+5aqknoDPcx2=v)GLePt zUKmUL1#({gcv`*s`7#CkeJ?{is?h-?PtiApPz5RBo36JoI2^B&QBDil&Czjl9pv(B zO0EyqHKHvWE}!H;f-U337h`{WD91(NE7BWMS$7EdArzX3`49(%HzupY8v#?GWgd6y zLKCwyL8?US`&N~#V0mKXo@j(~kM~6ykZ(~eh|9cVx1VZM98IW|NhW+{3!#Yn>s-U7 zk?rh9G;`Ik;T+iXIW7<%ymkx|4uf?T5CjBSb9H}RgVU{q-d}|(aQWYbf8YHGgGgD< zFI0Ik;Fk?TpOjBVQLT1?Mk5y#S2i$g=&ML1{3{YEN6xKq-XlmLZWF@3z0-`1{Ox;$ z!L~^vMIkgam|?o_%xKFQMO=f-iaJY=t>`{4X}I^&KTe)J>|FVyl?}PGqy^qtegWP= zMS8+D)b>ZTvboo!2mLT-6E^KcFHM}Vmp3ku(!995vW4^|x+tpj;SkH*h^UvLl_wI3 z%H?NyF(qVYNc%5S+p-V|K`)QBXj9$)0vP-3_m!oONI(-6PzYURcTf-A@zT-O?SFXu zzufxRMmhwzv4W&Sd?y*YT*fP@$! z#D!ECnjPoRHmwxX#Vxyi2w-t`2_pQT2o%O&@%Y;O0ZT+E=mD{baLIt*}S?w{#BIy#1~N< zJR*)bsOJ%FEL@txc?Noj1iZaeLHAJ+H*I9T8$~w}G=YK+{D<+$(Ew!y#wY-BE)V)E z&Vv}4a4CxJoXEhhf-Ub3K_CU-Vgt0L!5rqlCYD_)dg;X#kOGH#MyN{C*}eHEdWpK8 zj`lIL6i~HoZQLFOucZKIo?ila|6kzwd7e-~8g#!9>efz#Ciojbg#Sh_X-bPL-0X`; zUVgHVi#{2?eRy*krJc5u_A7)qhrb7OYGL_4TZRLg^S z=hb3V+{{)crh^Ic& z-aocgwfFk^F0naR$eEHN3jk;DhE|BMW@ilqtsrJD;UWkBPh%1?hp*7FR5uOHo^>PP z87lv>sWE3*o zwaQz_xuKCCd88*K>IO%rlC3-JTk1ZOTj|pPN)-Z9IgP>m!Ib6kT>l1MS-~7AF>WE* zj-u0mcvdXC;%V$=Reni?kt_pjY<`!S3}6Z1UT-w(@r!gYu}*oNn{XiA*^fe)Os3Oe zOMV+wzBm9Ur)xz1V?leY6r)4@s{P*F6iaRd*{x~HaAU%%72hLtmS=x&AKJ3IwI|*l zt}_Dn%0$_%rc5u%{Kvt_=9;E*#zs~yr_LcdrW(tc#=iVP)zFtvxL@-5L-I4>z85TE z7LZ??!Ja+dRW7jIk?E<+vx5l>&`WYUa(I3fIYM!>Sb~P)70ubtxLc3s;P5qr13_n? zun46CA)Oviuw_k#9(IG5QCucL^VlMXH!nz|M(i=E1sItI=N*KuY(dM->1cE_hs=zlYLr_O;#-EnGn}g+xwE9; z<1+56Y}}L8@BlRBv=fic{>uQ5kMx}5Km6lCRBn$(I&M8A@>)LLctAMi^)Fihf4VGZ z|1Xf`sL_a-1+C5%%NzH-*LI(2ZdV^B%?%;In4x@hL{iCfyY+4W|C@{r#n`z_?EBFXI0vED&|5yDJ?a$1l9-s7VUcxi zzw_&A2{A2;^tvyBYf!*)enR<@!j<3x)_$hz-J5dE>JzpEn-3bWXDn9ZM zJ{oE`tu|6YP%b{dhSSzw=rWh&VP1epx1HTxCl%JAv+o2D3g-FuEr@X*NgpV?Is77` ztIQ+JW_al#qw&#;;j_ZCqd%mi-WHEPkVH?se%{1Su~eS2EGHddd|VC|)jAgX{L~=7 zg~0SBlcfqXE#VnvD}>HKPo|0X#?-@bOK4$X8rYtIpu2FJmX}JeshPl-e9cy0Cx}|M zeUjj+Ut*822Wg^V0d67`G8Kf;#DiWRC~x{0b^^+m2l(PeUt}jZv5OfgYCM z_tHtPh!jFwzCl-)GEtL%g-DLnJi-}$xMTHr)7*H^!p9zRFT#TP2;Cx0m<-B&a{ZKl@VP zPh{W_26+cQ(1QCK&(*u~0nvbUD2%LF!PHeP_3_&%o0H(6hesPK2=t_WAAdT+Fbrx{ z@*$mp?6iYiMSPSfo=h1ZW1qCnV4cht!b0Ld4jY_LtNgz68QG5>qC7mx0-Ujr^qvsJ zwG}6{j^yHj&L%j^GE8ZX**e zEmcmp(67M2t?SboQBH0CJXKZc4w*HV7nl6U#mPkkj^qMf=o?UzEINQ*@LR}%bSNC_ z-9bK2P-s++F24a=YAEB+ANF&qPCiSciMNRBhJz7$sGaO;1Qmm#2uEZ(_0B_of5tLY zxwXn^;XN)KTD>}it=;3f_`C0^_gt?yHStF|>JM@P_8>TZ3h|FeJ90bUkt3iwg`yBI z9N1JN&Gt03jaARW^V=ZBdUP*^M7>2lR{%mvvmR%~zac}tJwS`eYKL6oX@CP_J9#YX6UxniN|~Wtx-Rrj@QVTt^M-5$>B#Ws|{-wZ@rn zmICX&!0JtvTm~KGUqQE+^^xZDb-*2qn}5NStifLrh2oj)UqS;v#b0US{r@i&VsAnK zzSdKd^K`Brnl?%T|NR62&4xAfZg$E_@?j!?+$5BLi5D;%1Y!sC7f%K%%_XKUmizh3 zTEjl=U$Qu#zdYD(1tJI>yLw#37srW+qOTIFX&>6r$(2hJ;ktV}4&_sxJKrdoW^0_( z3-s#wj=+$LMxXsd{c3A3t%B*O+9R+I5>qgO(YT5JbKD=BQ!zD+5<&fDy-4+>vTk6k z+^XyImWRIgj#>5VwQcLs*L?NruG`jBbh3z@a994MBdYZYObi8;|1Ec?9=?jI6M_B6 zIn%Kj`1E6PwF)uW(W~~G6OT@V+xf$V!WSF%tp!A# zF@)T?+D5ZS=~b=%M~b6$@YcdIpqrFTt9@6QNu^^k>ys=wK9U7%T>K^VET`m+%gJuT z+pT4~y}J7rlbb|dCv5Q(3nK)Bi#s2kTwo4qJC@#yPJ`1P2NQGk22?3*n0%KxiPzQ; zZ7pUNq-Vjmw(bQt8?3bCZDsR`It!}Jcc~s}IM_Dy2xK0!P*<@5c{k{=S)SrqU~rH$ zPur$LX!8VPjb^BxJJWa?SzfsH=%?pUpz%<~3IejYdsv>)Tkv@P$YW>6Q&QMIkBDk( zbipN0friHQs3T@NIO17OkA9|0az%;JV0_ISSmtyFOO$0L`^9-Ji3LPNDSRI6xYc~u zayH{DJ3EJxw^6iSo!2mCqI{d>qtbL{<1wFpHHX!RP0^B0xh>lQr%K5kxxyJZ`*+0QV5T|M@PsooB1tI z0-WbU==dyNfNVEKFu}(7D@h8dey5S%?tFz=o+O2~NV1;Z(?k|K_-J463MpOq$wtx* z??NIfG&q)TOq6N`QGzWc?(|X+A68mV^0g?0l|XKpF_Y7a>8Yhbcq+GN5*D|(D=1=z zC%*hrR)w3sHfe9~D#61ukPcTvmS-w8$$BX~SS& zdR(BQe*?j$<}?G|9oHc@koRq|qcHbzUzgv5;#5`JrcDf=;JBP`(=M~-rL+c;luGpV zuC&yRl-x8je^@@Ktr5#q4lU>{q}nRqJ*0K-tXb&QP1j8T>-ab|Z=>gc9Jij+reY=~ zh4?o&XKrT|nS*(_3}4GsF4BnWHtMTs=dP1`xg+OmFV+XujMpPs*LIV^QoUoj#x&_us2s_P_8 zkVfx6@|mTkiDifEI`oN#G<7KACT{Mz#}@4aX}PAamFY7Nk3Z@9R6o_&OladpJ=w2< z6M0Ai9LZ(f0WMQ8TaNxRT2zfsKzq%8)8!2zz;LGyKaoDbw2O4(}Z z+;YL;)K1zf$r3I}`scpG6tu=|%C-g#w0ATpn=H2>Sd)%{CE@Lp(1c9`^0B&WHd*E- z6NfAYjluL3mTUAS9-A038Ia3?qH1X;SZ_W+KG^IU4GY5oMMqOy6iowT%q9uz;~0h* z^C`IupvD&a`Gf@oi==f26MnFjCUW+W#8XS%DqzH~@k?c{y$Y}$o+>rfQQwjLl~+52 zh2Bkh;x^1p6+H&hHR7fQmv&-U#6wqaepCNNj=yGc15&r{?2T}NnLn@k#^Q?^u@N^U zAgHABFbLP*!*S;J>rt_ve4c(tKXSxi&C%oBJ@EWE{wwC;WyP&#Ec_levLUiG+lRw?W`8-6Z%PLe-UuO zkfV5bc`6}H<|~WCA4xu|`kIoCKdyGI2CX;00N31gO9*(Su-Kf{88g7vy_Fg@z&5;9 zjngFaD`1QJdj*rQ??&2yTtuqq6B$;VBvp-Xw6sOsU*StHm<@aZ1~La;n41qb-_OAP z!q|t4|Bm4&H$x^~cTStF?;3^JG-8yXsU#jod*-jT88G=YGaNhRq?PUokq*!UGz5Kg z_1ll3Jj_)~zVR-*wdc}OAW3z^TVJ|D!;CxE`m!i&eV#`y6Ra0*mlD|4CoLZ(9_!1Y ztJD3`3m>HG|F0axOWU(wU8G;@`(6rn1o^}8qjFytmNVXa`>{!6*4x~iYwjb(WW~0C z-vuq>qMMLuYj~m1N$%1{HCc=%Dp|lGMjzvw^R&!n)Zlt-%tlwh3-g}wH$>XazMOKa zt&yjcOl7jNxg+_eO<(T1X14%UJMWt>r@SFZ;h5!KX68W=|VbWAZ{tM|qn<_o00 z;kau@xj}pqr)m`6<*FZ}_r**%cuMK|iaBQoWS_t}yhkNU%sDLHAl5w2b{s^( zjn|1#xx6S$8Dv!93su3XG8w7hg;M5ycK<92w9(#6_4B&~1f-;XEmEybEsVWaXNZls zP!U`wXZrV3?+}wxrH9(FC=qMO%<-V8aZ05CgWEpbdM`V%Eus0%4ddsaSa~L>BgXd4oLqL$@wEUcT?&$Qs}xQ zON^w)LAyw}d@!eaTKnc?<oj*Kr^41EL6Y>;6U}^1^fu;CZW(Xdtva4xq3RJTqLc`aC4^KbMJU}VS-JEI*3vIetI*QQ)gi_ zvD!dTYcqk;)G5nm*IIVwo>-Cte6P8CqW}C|D}bJ=+eSdi&NIrBllV$tUKRXt^b}1< zK(Jxy+a~N0ptPZdBXxMc4^Ae;A92n4>7LOx!SN6kc%Y1Lp<=0w0%6LCV(TXw6%&w| z9|uKe&3*{G*uHZEiW>)=&|K`Op9q2ad;!=C!sOP33ll_0yY?O*jc6y zAGDs2p8n0iBi848dt@6gzy*4>M(K-DQJ=X^2m&3~em^?pQk0;;Q;*cH_Q=mKH7}gu zP>;#_bu>FEVZjE_kea)korkHl`G)p;L-Y&Em635%#a5B%R8g0Sk%TG9C$3vF`=u1@ zO3uWuHL5r@PcE);lLV$y&JUE{ES#l^OW%P$VMac_nB(U9+3LPPU-w+Y9fq3miKeLI z(?{x}EbDe#1>G04?Nan>U1lS!!nT(M{=Vj!0o4o_*D*0^t%qs*N$zNtY3|70R;^E2 zq~}W+T))N7Ar23%4)so}+Pdu}phD>9*Zjvc$vw;CpgDU1YXi^}BE;QD^OkrB2r1 zkPv0l;iU4C3&T$)^s_FR-Lj3JA{pyFcLkD&j-lBC%8w&F>u*E z1UoyOqwF5JdZ7K_ne<7=g`$k#2;%aR#!+ZT*X^bojLt5blE{%kp-V@%7SFi1RJ9(r ziu$}-0HXJ)46E>EsD)o~!W{MZyz#mxPa_!EIy$&r&HPA7=UVf{536dHtI2N(D8ByO z=(BZ3us6N}RfP2#l$cF>e1~y7w85yHoP`$IZDJ^1TT8#8IDQa|>yH~RCavKsy1Nje z@476L-hjR2Jtdyo-rV>3qERB-is|01l~+t%JtU0Eg%$ihIJNdjr>QAFT|RPDM#EkE zX3X>SAoe3F*73>iV@$*6b3rbyJ_f*pQin@xlI%5{i4mal2jF(aVNB6LWPTj zS%uk!?Q1>yPoK(b@A>**==h;%TAG(Cjd}0YQftP%$Zqn*8?`7*ydklqPjtQ6?EfUx z^!UT`)=K4p0YTHw3h>hIpt);0J~6d^Eyvh6IW_f&-I&Rq$yg#e@$1ooS&M`R&TA8| zS|bYM+J<}I!KbsF67cDvkNe|wOKk)6@%n!q-{VyI6BF$Fp&HT0$*#30TT*1IXiFEg zXdf9RA>y4t_Y)tGMMe~_Ji~%ovNj(kV6>-5t}g#S-6e}JgB#namM*!?!^ILMrR}19 z{rYv&$-~>iRqqvj3&{j5_g>PAS>sQ9+c)AT{6JP@+Nq`V_H9fG#HoiACo*lQy|GEo zq-W}+g1IbVhDQ4|>+|0I^VNbgVI z{8t)Uyg8T889QZKie^6Gn2p&$6Z5xCc9vja}K~H*|3V z8hyLzNAqVbB8e!N?~&2*7N6eFo2}}=5K=PSx$JYplPkx*-+Zz)5g<#(E&q;?(%B z^K@*?*~_eEXSSAhsxaU4wCN8sdY*sf$vgIi->gZ-%rojg7>Pe3L}s4AZ14lK|1+fa z1|)&UFI|w&&#`P}?41iYuHUWjI3*Epc+dkK2I>m{ksrc$f%*j$10d}z&Nt;~8aaa9 zpHP2nQ+QjAYtWlkYY1L&-#ef`H6g(Nypt{gAK41snS1X36l8Yi8WSB@= z6JtPLLg{lSpEBIh3ed{C|1+6~*tl5iok7mEk(g z{S0)w*quF!!$u{tb0uXioJaxm&zkx!=FfN8WjG zRnKtkXRqmcWDX~gcpr`!Rb zMwSkKt~j8@u}*alku!S!ktQKd2WV$ZQ7vCNc7GCEcYVOQnGB&6a5-8Z9NnFc!UYj2 z*)6n~Mbgmv%Mb+01yhbo^@-R{eIOmROZ3T9SPm9>^^=)6%Rw`!(QsM8MnPkV(~887%SJ+^vL`TG&4&Y!`C`Gx=ZggWwa>R^pW_e z*)~WPh>Vl4qGpnl9%(F7aA=w>I!drUzPOG7vFQS8CQ9&5y+VCcQ;{=N=E|Z$L!b zT)5kR4(Jh?j(^zSR9Sqa<=$ERAQKS$`@K@EX;bsB*KxjMe%>A+phxjYH)LOycTtobbxr-=rMcSplrd@ z%&p@{yo{AkzsAH41N14*_I483fev*;J!7vNa_zQ)sJ!0y;R*qk#NG4ca__ zh*y4IlG5PyBuxU&F={&%lefWCZNK(xy70oCoE^_?EGnbLba!B>_1+*?@UDamSu3>tmbsF;o97 z)l^x{y|yUwV;1)MS*ngNuUQihRTUrlFvbKfOUeg;YlAWy@h|pug91)mvI|~hpRFIV zCfcl*!c-Z4b71eVE9(ZE1bnAk`zl3N37k74QP5>s z5Xp|@uu)Ek%EXF!*@|}o9o0&R*nG077tsq9EI=Qp7iY~dACC0TmpJ=D5FX)Uef7PO z=qaZpimILzzLgeiPk{g^#nw2Jr zLa-m1*;$k0Wa?>E6(qrVXZYd?+B1a#P$n?)#wGnDi*)SW`!u;PX`#| zK3(Az1_EU^*k5*`3FutZHsoaHRo`E#Pgm;eHDS}A%O7~O@{47#Yl16zh&jU8F*e*g zxMtwFTVHU5pT0Lz!ascC``xY!kjLIR2f`zlC{o2-qBq6eba!!Bq~3!*-14A8!s*Asd%DNp zatD7;)=(PORn@41Ul-i>U@3X$ghCMhi=2d(UO*7nyX(W4HKv1}bXRV)gkE^A*&2e} z?hBd&+C&Wv3yDj5uJ74>q-sYB;FKKDC|-`U2nfJ3M(VTH<0V$b_Earqkr?;TADAO%$ST}ec+7y&G0G+cg zE1vS5qBC5qa{zSU?9vGt>c@Mo!k#&Q%37)ecA@%(QTG%#ia@kpw z#3nsop(n|luu6+BP2G3Whv=C_6_LhFXJ zQ`^e%aQ)r8Lm?wWhw~pTZ(#e4%aD^PgV96wQhm@N%OeeewIn)hsi)v)OdsFb@Y4K| z`>#9@Z@|9)Y&~;(X_k{>k9qK9nN4hE8ef*6O{2@n`lH$ZVc%5Gp8fw~U!`YOuoZr= zqS@<;uv?FHvQ<)T(`$UOWCDV${77lY`QnS@pz!NdU`dW#75mw28}T(MLxhR>igrKl z9h~?_jlqUs!P^ZOjMds$acpX|K+W5{X)0X7>p)+NN*nOaqWyG(wCH zr>ZX+F(J>wdWQmYWTxsbG+MRP;@-waz;8Z#*17RCd@An2WyJU6DdP*Y;NXhJ(`rlM znIKsn$3O?Cj-vPIsiP)M@n(R1r~R4be2NWd`^r(nO=3cqPHGzUG`cfe4>_2XAI=wl z&_}RXTvrM1Xt~uW94W`>572)(nT*}b;^@YejQXoM0-64=$x2Wi{&r1ZMXP2*yfAyG<}g$~+?8WYw$WuF=)%oHZU<0VxVVIvr!tMbDtQtY z{^#QZ(3BUw6HA8^FU0cUr&$>B;Wd?a41FvHSX+&4bOv8#UJr+_D8iJc8`UFfkrydX zM@vI_Ta3NZZni??Y5yE1pUCm3y>6#_KlwZiE=z1_W)`{0KN!Dxh&{I^Kd$Xv`9wr! zd%&+(WoRdj9Z+(^-OAN=uzO}&mK>g@1Xy#aoYIk5a*P!JD@ zoAQBe*{SQ|EAo^PH^!mY+$1K&b(qgZe*#o9vQ0dwV{n=cKB(yYZCvmfvn}{uFML4J zrEbB?H29e4{~HZmrAQ~y@AYpbjQv?qM}Q;(`**L_FLZ^-YR9wu*JeZq`39Ge<%CM$iN3Rdt($UQE@3A3_I%WlE%VsKqoa6PMM#B>;s>+J>( z-NUpaRknWP$@9>Xg+AI@pfPcMtsk3z%GaJIdggXMz=XW36=8E@TxvNBRtNv~a+~2i zcDoOMKUnh2nN!D^ycEO2CE;Uh>tW$XACNbv8gM@XZAf8^?PrsBEJKjb0@ARAe{+EW zz2oWbjl@Tu5%=FzOhHteC`PXB7~cTcLr%vfrE@4hX|D2w#usXv!kt$5i_~o!%|hMF z*uD0{wlUq4BbA2pZ%-GAB_80Rc@0XsmJ`L`L0yR=&>sCNlS$n&_wQY4;=S? z_3Gq-8LM5o*tJ8+sKC&hla4#pry^0S*D?|I+x;)-MZZ2PQP_x|dhYorwBM86?!e%p z%)K$i=lnA~2pa=664*}O_dxLF(-rEtrPXg;+Z$be)z_9jpz33Vg>j_oj@-5DGEj!Q zJPNdjFHa!46Y|)WKYLH2F0v1$3>|mG4t=d}4UfCo1t#E~4jo`;L-i&MPB)c9gDxe9 z=NiO--1@VXiwhr6neTLI+!B3TcEBj0@!Rt-#$kbTs9WA0=v4&GyQ9Ox?k7r^{y12T z<^Z-CqQADODJB$<2;0p6$vWgF52gfDoC4j*6XkfFXrsQUjDZ}e5B)>us>=zWI!wF5 zE351Cqsil-4)N_eo;LNg_%%T@B}>a{at4W?%fT7~!1@3;FHeGB>uBW#4hk4;|A_l^ z6z3N}sEj-Z`EYngtx{(qe7P@LmYJfPL+UjpY{4Hv&+btvo@BCVip?XO`98DGx`FvF zFge>xbUwD+o28u3PJXtytYv*7>OO5*#OWjYD6K>yztec>) zh$j(?;ZwF)(;K-e=b%H{Hm=dZvIK}>_-3ysG^Yc3zUz7N#h71^&uvmn=KS03Jy>ws zp5^5AlOImAH4ucvx-3$Ie_iQ*GCy~H!tZUVtvg5$WVkc8&@8MRdj`9B&Nt<=nQ(Xw z`)sXNPi#p&_UFKfrcnHa+8@6=bxFm=FulFrv@;E&w77xUzTVwoqO?rG)!7ienK%DG zJOxaQc%O8X7@exshT(jXwl$E^M^IL5PuZB$H4;+MbCBKSjiyKzPBuj5FiCnh_q_i0 zz*H+w$Jifh;Cd5e#(~&q?bZd}dZ-}_4KeMV!G3`X0?Qe3^rg#VIl|J`qAy*3E_ZkJ z6i242s46G?U~9A1f79xAtzpRdvE%(IGqcwbdJP__r8IcJTKC7B#7Uk9Z}^ZWhybtY za^zVbyvl1hE4^&v!Lt|@>NIh$T(1P7pV@BhiR`!cvww@HyTp#H(GHZmN0?BITkP=` z!7k2Rew20GGM_ak*WsM@;1jwZgRE#5Mn^<#aUg=ZqBI_ALF}NmpuJx4xXGjT3kTGl z7Ya!StCgA!8$*+2v0S`06c4sf72Br1XG7sOx{{cJBKY|aWuKEADKEB*O*-&|2gF9} zUV0gKd>ll%`}F#xR*G@*fx~N!;~!2^9ZY2gnyOd}vbBnfY_Mp8TQ{t{Gp`K()W@FL z606n0$-$2w+SPda`yZ;sF8R91=N_3s z`$iT1vkRO``@ma#@?`ievv(;fa2;3t*A6B9&!xNuUO}uA?fmym_nrhB2qW68eS1}Q z(qA_85w6VMr4;r%@~})4dwRZO>3_B_KKq-0*2Ryc8C3H2T~24Wvxrb2IO&W8X4d6>URBp+Nszpzy5YPuJRJbiY4GJSJzrgE)S zL`^5zwU(wkh7$H-)8#8-=Ws9zv6H8zc(anUW5$;C@#pI?>y8U;VS$jI+)5^bKD2=Qz0zR4T<&pbwg`@ivopkqC{Rv|5d$PGrO3#oVwl;GlW$6-OjB4qE4JBzMZ-?K#sWgg3XV~^=TxkVm(gIT>_@h3UhrdX1 zXE`WKN%fs&J}dvT-O#PQHvZblL?RrlUGgi-goo19zM_8PAW@`$zj)TG zb&C$>nvM`$pNFOwll+hvDX%1pz!)j-XlqQmc_4_GR6C^q%cKVEXx~J<6z@+{;l;pf z?+Ya@ajS(w-ja>-=9rHWFzrKuNPk(TKEPj7MHO<2L0-3F#%F^UE#4{im@lEAqqrD8T0&W z+Zd-HOVyO!9<|X!eT8Ve)V!B0Y*$`N z31|=&<~-6-janK;M#TWFkgdr%00uuJEdx4W3@8Jyl%sZ-XB1;?@$#_N+R_M8qE}zc<<(iIAC!cXUeZ#Mj zU`aC4){Qb5x6wc270hvM=lI&s{EY7hNE?)W%%YzDP4{f(+or1-end1?!{OstYw;(> zM`IXk$bmUN#ss5PJUK$pgx5*Cr+Qmd>T)%ToaC2Nruzoulr$RO1<}nHDA;2V2LSex00c05BNRx{4*IWn_@O9R*1%GoUh8p_rnNu(zMZJUUfEQY%V-G6 ztoNww>jDEdp4SEOWC?toW%Q-i_DGZQhf2NeaW`^$FXv1&M!N&1fK0#}9LZFgEcDjr zeK|_xcTzcO^J%zy2*`{s1sId`xHjem&A>6g#~eOA1Xk!`fl+U}uace5iuUyF`UvqUXqhxG%h&c}NkER=R{j|a^{YOZBqLfYu122^}0i;i%9 z*T6PBZ#b_#4RY~b0L|~s&*{z@e^pp4w@H$i-y7rVKSoRb9vunxt~#MPo~~(zq(HR= zIv7ftm?s-POH|m}3>IFLpWD~ZKWzVMBkkKxUooB?JjrS&7(Nkqt&^dM^(&u){cfka z3@i1&s>k1a?P@%p+&G1g{>sxjQGbnk$M|Mb&$QTYZhe8A-*0;jF} zmYDbG7om-~f3m+|HZ8{g+9LuB(Kqu;q?nRpy~V{9HA;a$?R`FaokvW_&Zk9|FhT4m zS@?CDB+~zmk!)OZ#nHEdw0m$4=zQ>_i4ThpaFxkvDE<`d;kIqXfY6e8=)^BBOH(zh zlvRRcI|ln-x`8p$0f~;rEuqq9j!(BLdQT$7TQ|^0v0Tko%r}71r7E968*^0n*V+$p zK?cnC;eAJUgBmV~qLnADkZ{X>0br@@0g(Gw6QaLzA75rZK=UqxVNrV+a^~j)B!^gfTo?o#Ev1p_olQ7pHJ9^_!W) ztnUT~0YmpKX(0w?+NoTQD=M3n{JaeRyV?V9lo8T@dn790W3fZ=eMgUOtZ$+lh7vXD)zom%}Oh=tb z-eY!VP#~^t;N>sf!gQ)O1MtqswpP46ot_;rsd;bOr=+B$Z%)`lwrY25hF*wZ544)I zO$~|$8mon$)hNg10h`Olsz<+pr>$2nHguHvI6HRLx7ZtVr_Cd(j>H(d6(GL1F%FtV zw%OhLOuIMNH0h#NC{Ar}J5y&FNPu*{ep_HQ&VWm*RCNU!+yh?Z074sbMdKne9i-e zwbD{X&C!!=}-=^Ru2c`1>5d^dR6EJ*PvP_J6c*Q zAYeuBiun|-M2>UvRPslA9=RqWn<{+{MNU+g2C|eLfw9`9ISce_%|PV;==xVMXq1Ea z2#S_Q;5i|#$7IE(2{68K9h9eF*DBQK*RNp*UOrx$qfNa6V-i2{ z=JceL0+OkH%0rhL=k=jqgcsCIF||2kZAz4k51yqBNhEaZ`ovXiPE0B&$H&EYrg|Qk z4zHIF@K)EtQx>asZ{%tC)_ERoRz{}%ym7l|o{d>I@$+YsJq~49;S8`gaK}Q%Xv3fF z@M)V}rIYN;>z$#XwAjdr(M5yMtQRvcVJn03C)hUqOyez@(<;Txh5l&wOp{(Eg$=g5 z%2wbP^xk@R*F`mtw4cEl@VP`zIyrC0ANX0_&A+&JqjCGw`#+Ck1ieD@>DJnVU84%E zzck^We>dm?MuZ}TlbN4eh`;C8%I{0IGYc#Fdr`xNbm>N6)2N|5co z7bU?q@muit31J6jZg~7;o6(;-Yu)Vac?dXqYF_uQ-D2d3mUgjj&5_QX!~l^oj~VQJ zrR&No@r8hZ$}hgNsbnQpDKOz4pEXw7e;vmyi1s*lnXs-JOe@H@5zi z>KX8oz-6=b+FrZJqL`LV^TlA+`p$D6N)r>a@i!#NmDaJqmV!3qc!y1#@lWffcucaE z;l6Qe^TuPRrQhX@6}Lr>?{)*~uk05xyy-9*)FQyS1|a4!XHgGj8`i|6bq;UwDw&e8 zd#-ZaC{nLnBmVsYbHB97d8IjV;OjOiBaIw;yO7IekH#L?Rosm0e+YPv-FQdrMA#v! zLrSh@V$P?A7O!DzKwXR;{M&21I&xDERYrQkXZgX>Gp?*R4d|82^b^avnd~MpHOfAg zu@7EPn5srW>t|cDeU9bQykZ6VLh#Zn`9)6RClME&YV}z*ud`^G`?mST4j!%uLUnQ? z9@tP8t@)hry|^A?9{j|gm2vTO(I|WRV~ohV+D*FOf9jB`w@e7aXT5|+Tmghs(U;q02N3va2e5%Ae7lp41!r}`f2RKQH=r418Rzs2 z_{_0b&Nc+3cJE|jzTej?llMSxv4O&Tezre)wbtzWycL! zm%3b0B##*rpwvM}@@Kh#<~Ujo=%IByf>KQp{&>HSHii{0xA35FY_Kq>MO?MxW(~0BElKaXJ#F6kN79VLko#w}F{i;8(odcUcy~-bwg|B! z2k7%Q%{Tz^ea4o9oBkj6-YTxjb#32f&>^KDAl)EHDkxnF2nfQIl#-T`?m->OM$n!ubrdPfU8qF`H*M|;zB93yle+;eY>KzGbh+;5V0`lix;XUl~eS^7vh zE(R$QsitW*r#gKBv^g{v*RJGe=<7d`@xEP}z{arNSQazmJ2}?#$}hnW?h~Oy#zrZh zYAf(V$9PWFFniSKY+e&flSAQVDlf>Z#|$qR(J~?u?rpQ8>`(7T*k8k$7{k9SIe1p+ zYhxS~W*>u{?26Kr9Jfx^v5IGQ?n-7;TqICVo<|%vp6Cc4?`erIhz_`gUQbVO`q35Q z%YSgX`SDQ&Z74s9eBU|m`Cvaj&nN2H)A7uRn89-rvj<8)wej&;;2<_iB7lVdKss?L z_;eESy_N2LmOr&To9Z_6PBVX^?R%#dA{-+WFX6o(D-n>cx=?eOFE5P0xnZo4(+!&O z4-|kx0*JDjd+3f%8ZW$Rd1ZjS1l$i+Vr?FNuO6}X?#_5Uw@n?xUW589$6uxsLgx3x z1VPTs9RqTL*YSng(l9}?A}W)|in-(@B(u*dz75+>+*4vi10KJt`b7&cja^bp6Gimw zB@=x~b7m=)Cf<`(jNws3WN{vB03}(;B$IfyNb7$W6HWQ}qCn5R8a$%!gtULCV;zz9 z$?`R&;f=-?OL8NTn@E=Lqc zF$(7?F)LAlbNe>kE9<7Tuux$jHYhg5*m^99?uAP}O3*xP8{mnHIm33PY;ht}OFIgt zB*egGy!K<4plpQ6 z>Pog27~*N2rNd`Q`;Qwm(W$<;)FSVyDOD)pV+#C5 z)Kbsdzt-bKn2qlz_P>{97yUB3uv)75M}nYRTB_}f({GAg*hcdYFw-~!6Y&w_Ze{4r zZ9h#i4KMtWVMHWXBZ+p5K|b=MQg1xj8;3}9tn7*Pa7P6~G*i#{Kj&9TlT@jmNX9p7CyKTk4)Ku(RTWWC7ed#^)>9VQmxzAB75S z_dZP>6%{nPoDnq=kGPDc?7{Wi3fo)r5`OZ0>9>0oD)C{a?e(6N-Y1G>Et|EbE9jxH zQ4HAXr(Of$nASAraiQbw&&!n4q5O5yAH6oUk^UvxOMW5*KQ>Cvb}yq2U|jw8haMG+ z(jM3fBGXkzQEN0oifw$* zVf^NEE%aA$q|XU)(c;c0`7(cb4I&z`ZxUz56UbCotCng4imbA{NfcBQo{;y^7UDNq zvZ?kys5zJ;eMdrG;iPvO2}x=aJ_{LZ4duF}UwHUB${+B|(3&VKo3`FBht@Q;2J*}I(dN{OG|BA ziTx+EhyX{LlGlV)Y=%@Q{Y?%UanCSv# zuBG21@Hk++eAyY#J3*gz`n%tkrdMeILlNMgIrW-t?ii`dYW= z5thpjwfEu8~d;KKMS_G4ZD z({lMO`QHB!q<#{jD)MJa+uz>ac^zT=%>c{21S74_PyEX5jMk6zb1gr@+_ogk77Gn( zjlG38yu=n^W;?IE-6I^`@9V@4_>gRWMy%7}7xX4?{m4(>jamV*HYHTn_-%Ut>UW@t zMW|4&KgrorxUhQCZ?P-ADw^f7BDffNC85IbZTA|tz`{(}mGeU;48r*fttjdcYrVH= z4tc-55#TYQ{!0^a>@Xv3HC2;eW;J23#H;9BXG>XG~77S-KU4hltA>xAkyAWitQfcjGRkyubV)8;o?_{8tH@r|>+lQ&jHuVPFf zYf*d==*^M*kx;&Fw(Z^lw#|pY3FtGp5cFCr6F4Dx3~HDX*=#{(BP7l&s0Z98kJ&-L z4lEd#H~Dwx1IU{}r`}5gGpGa)2lH8TFa0gN_r{gZ9qDX%tWG}tXPg3y9bAWPbDWZ) z`*=b3gVKVdE>ywk{?{9=*~*(GuUSBTm9NEU>pYP6bT`6}wpE)zOLJOj*nW~bCPz7D z=3SGS|n7z7jW1mOBD6>-$w{H@qCv`w%b-p zqW_;&W+1AVNao>)4VPq~-OFiDZ%~d>op|>US=6nr>(P{na69t2hmXHw0a<(g_4sZG z`^FJVoTwAgi`!Yyfneh*_F=p)w^CErOVb$a_eOiewk6NIMMPsD1xg&Z9;0e{P&_9K z8|#EvPH#%g;q=LCat2^Wu!fTNv=Us4lk7^krx%L_l(W4MG8pgo*YL5&1paC;yPa!Bp;i6^_Ap6$Rli-2PTMPQjACP^D}7d#;l_PTKXZ^)!+my04TTBCsP87GHc@Xw8l!ktkvZ zu!;ZeE@7z&z#C&#MVJ<^5i2P!!ng!RN5&7~W}w-h4mJLNFP)%`JXJ54wYH#hvhJHK zgfMOS(Jkcnk`FBk)_giL<6N0x=INH<*GF<`JNp~&@BUcRbo4_?S zihaB-ozT|J8#xRj_O3Cf4Vw_Knml}_9&J@vYjffda)6lfZ5kdad19T|_##UGnZy1l z-UM6PTG2r(Vz|<4OM**+Vdm?1CEMT5O*J)KAV>I1lJLv~+Q6o!?YQ(_Jfrx(6MQ zSyB{Sv42#AFTD>F$xIwZAHTOnys;4pGC7oYw^{Jb%-D_t@4%lon=v3`yYF=3kbngb z_Fo?$u!pBMFrVc-QQT6*$?9ELdVjbeD`2z+zbEu1rY+K!OW+9XUbn+9ME~o~g^$0$ z3eGP4pNb5~H&>5mQol6j(lQxM2wFWXv| zFB)*2)B%>>e>~#CZx8-Dm4945q;h-?8wQf42|LXF+s9gBbR@SQ{YnFtLMvMs;c?NS zeXK-Y$X7cz9yz!cNTd6UsK=4TJ>RrvWlRzy0cIJzPUTpfs=WlH6fVkCm7d z$3rXPTB7IHyQ*iT$PdT*w5PuoSVC$*7dbu|&$P7LjK0kAYa&<-+Amne2RoYtM8 z(xXq?FS71!sOi3;==@mv{pZge=kJ>+D6HTGhK+U?4k+LNa9Hins*!ACp@a&;ts-{+ z5{1#OAHffme3Pr~wCi?a7v4^D9h)E@ZYE#|nEw*17G9AIjUBS<|CBJle0Cn;Xy40H z?Xdjb-sjR>@Fz8bJ#A}*86W>L6AKJ+T;KGe5_PRxpdcG#xGIQ4qVP7Cl3mceC(c^f z?xDGPh0S>{C*H(k56bobnw|q_j@9!Jn$t+yDBZoCA&Z}hK(n(qcy}HJ( z+Sxtfc1tNfzvx#Bpw?uSl>vN$HY*fAb#2fL4BK+@aK->Fdz-<4rt71EZmp+JZvu<= zJ7A(x&~zOQLDs?ez{?hZey$z4B;S~E3QWR%l0V>9EFx9HX)JdyW%nX>-%k5y%?Cdll}CK!rg2mIp`ycZ7<}Hk4FIA0INxa`>5qgR?YGFPJ^q)AfErck5?C1)zyB!p z-r-l?3t_vd;zHH*uq%{z!J4$U#2ee&c%%*$;ns)kV{gO=`TXEM{?%ESXMYx~Qb8m< z+sYk+f%9d<5eJtjQRiNX?a3q2y$rW>I3(}qq9eN=KkB%h|Fr(cE1dg3+%OuZeGtka zQ(lKbKG{Gj1v1beBk<1ti5a2#&GbOW3Uc?KS^u-BAxA=1G_wh|vrM-cxQiDpM~(2; z28@tQjT|1wo!uvijYd&`gffYbt{qo2g_*oZ!4v(yC3u1gZ`=0Fp;iojW18LgJRr!m z9rGn7E-nd8*5^rSV5+rA;2-W{q-qcg=anmntI;tA(%8@4pNe=?30A1k+tX8iU?lh; zl+f7lt<7ZJ&?~}E1f^_6umzw)Cxc{8GJJpIPr`rKk-=ftHT~1XuU{{?xbU4zUHgw` z!rF30M8meDETEAZnU`mCy!|nPm0h@(M`N_yqEP?5FI(SB)C5h{w#w_me`C5-b-c{h z!p8@Ni~$|o4vTbOo_yW%isYNuuai%p3Z|;egyDniic%!n4bQStqxu?Tai}C5%ft%c zH#=w7f678^B4+t!EoP|yd(a~M-1&a$k@RKO@qay}qq1}GZoyj857ctM_#@rbTY#E; zaer*j@pOT+#fl4;p{dUCphi3x_x)EibK<6u`m1UK;y_ji0kN9?K_Van(7jm5Yj8^(xwl&U;doNum9VOYH%ko&2eALsDhI-i&c!k$bTg{_^`n zIf?xc`%IO|j4{d)Y$J6u7mAyRL}lqY!k~Y>j=-~cgcbb~jhw6}O`Hp;Sy>*6!B0oe zO%6ac0E1~P5Qi9;>YUwy!f~?3MJY|fXDu0QTt84iTMKD$Ll2AL%jegs-4XAf*3m7E zznM^Xa!GH?LNLYCzVJ4S7xQ}266=L5^-RJC`xZbF06Sd}*p~l^B={OcK(VouRJKBS zjyu!fQwtRUrfS`Kr*VG83TeSOR1Y~vkHiD{=t=dW87W}R;Xcinv|+IEplTr@B08baFgKbEr^c7tUb zcrlClFM|yTT=YdN_-VbDAqq*)AntYGG50eH7j*caeIMNcwkJps^NwBBdI7Snw-bD4 z%@VL|Q#)~QO|IPP0J4KT`=T9vAgw{+5Z!o|zWril`>`H-T92Ptmf51Z=ClUn4eb22 z6XPy|DdZ=I=7j*-1h6ijt$UAhIQGpl_KzHGf5Ux+-wd*h|Hv?3FBiWRoc1`XS$I3n z?D6qXp=aex;Q23^YS9(n>zsIDu_CkXvlsUlHj-Y%gmyjspFW^%;ZiY|;cHAjL78bAC;rNmnC)dE?3J1`n<%u|+@I4vw8#-*m9 zq=mYwg=B%(pyny4bld{*KY*=#cRc;Z>0#a=dYxOBNwIw@f9qTBfNNL_P)Z}X1ArP> z2WGU@fZ^JX@wT(Wt9|#~7sN?Qir84SbwujV$kb#DgF+=xsRuwXRUlS;KD|69ug)~H zS$~3<=a_2=$NKGd`_HHq;A3y+kN^2M^floYIjhobm*1XcCu2NVgb_@wN zoz_U(7$|x=#TRHAB>+4I3I#q+;qD;ykPtDv~7Z)yKYv-nafqUdSoPb1+u+0T4O1!%>z4?86>gF3Ye6+z+( zq~kN#@g$T*{7g0*RS&7|Jr6%?dEdZKK5sghfWJQBoAFS@W%F6Qa>gVm+X*=CQ$l{W z)chBSAGh>`#NCy9wu5&A+4Ae^;ZH>-H<&H?mNcv2c(7s`E157!0G$?pYykfx5(=FA6pkon? z97fp9!EcfgS8+xYisv_`@klV#)z$NH(PXKCC{3KSA{mX`7DVb}_}Ic*lQB19ax#jv z!PCVIAP1va*GV}0&O3Bx5VeQ-BvHu}23MPP@c(VK`nysH?YNg>zQ1ztQG+zLk2l#B z9jp=;$jQdv5N}QUS_ZmoSVg8AGSK_(=B7IEIUnJQzJ|dhh&ZQKd-t;lDV97KaWf%U`#8*(F{z|EXrLm~U4&#AM zTd2fhvV`#k))9k2Y({#v>V4L%;Z_x@k5D-HPjTp_*F6A=2QSMF|5`XpEo#IBE|4Q2 zGWar*S1O1`vSLztUkpXsNu&vcA)T6eWhoL~Jw4o`pu#BrL*oXWcy5I>yO8orVpS(U zj$)SySJ7mc#GwBe5ebYh28LkJ4r1^_3fYd`1oF@l)sUvHP@D+5iPk>~w5!KA%h6_h z<|nE=UQdx-m5AMg@C1O3_aBMoV>J*Z@cz%_8ZVep4n3psXaa4zPM$C94LLawZ0$38 zKh>vjKVKLS>s{-pSF6m`=;Y8HPIbsw^xYM&=oCnE2L)c4|F$x_NgFTEtw#$wMnG%x z*hge%TNuIOW)pxH-DOc<^PBg-u!t(P{u*TX^WQccfZ6z$a0^5f0EP1x4yTwOf?MoJ z3>VIL*jRA->C4-(%L`fl;f01U5P&#K6@@I7<4@a zT26<|BTe#JKMrwmNR51kn}-&~qIHsY`MOfQL`70aRECe>4CZt)>}&6!%7MvfB&6YU zgd9CN*`ss`+b8!h4FM-&d)EJHBYX<7`hW()P#8W7CDgJ2PWPhbd7|?W@YlZUC?BS zdxA~qg`AgC98WyhvPM;?ezL9U)j22b-17bj{I{Hyk;wwsq4@YAM6@N18S_h_w7wf8I?1=i3#}*xPI-UH@k5+{K0`4xYDX7!_ z8-WwV`5$v{XHRwxt4xc%l2I4j^^Ky|;%x`q67NRqK&Fz6J}%i{tyzcgMFGb_%0!^S zn*2geM)za{Ds8cfi)*kcCvl6b4=fBOmLfR`K@L_f0|RN(A)qc&83rh)^DBW((&m0} zvuudsmV}QP6t6Jfnvz(AeQhpV70llv-l{XX{|eYIl^hgvL9(g6Q4o;@hPwwOI>AD?~=X2Ip$^4IAaeXn}ULbU={$&V1 z$Hd(o-kU7S_ED(0xXD7*h}T|bFB^UII@0+xCsO(JCJWbi5X->`WA3Q{i>fk%4?jB# zfo|}EKVF4-C9dLXx{M;Zd?lak1IOLfD{VE?0XTgIvandT{=zm9}=FfANd!)Hgs#Fa;F35{H57oav1VyUDBf^df>6hB9qp+3(=VKb@e^d;VsR=M!jYgvIW_wTCByd#Siz0as) zd+G!1{KaLFc{UaS_Io0GqSxAduSJVb$&?H%h?;fKZ;(b;&xDxv;jndQY608#*_5on z!bfl``v=GFr1%Es`VQvINzut*jmuF>vb{8|+KD(8<9>GK$Xek+()`EVIZrlU+UWZ1 zQ@a5=lrJAEi(c(Z)5l*TZ8mLC7AU`2-`ZRpnePqb0wiK~WG^S=W%X@i?;UY{&&=$JF;e$G z7RC>#S?}CxL9snb+x?nGbQ!{3W|-2Nz;gY!M2xc0a97?o9yxZ6)nrzLGu>Gmzm*nj zlf4NUN1DcW72q#(PtRHeafaP)``0M8Yy|#H4>AqFt0-Iu!g${G;FAFC^!l}?8G+_w znQPzDg^nL2F!=OqF^&!2VdUYQ%ToKaMeooR?-bQ!7er~ zSal1svJbSvK?e+tNYLo$u}F61cWT~BimnV1mR1`b64iJRo@``+7k)#Zk>2|#3h>s7 zc(g|n@wIFCOjlm4x1QsU%EPb?7r(W;W(NRc3)RhQ;aiCwzB{{jxa7JLMQ2_SM0`na z|E8#6dIY3!cdJ~M(}1_j+k0;2DJll{RsnR(pCstO#;$}r+Y*k^bt7T>zCv(9h5;6O zaxa=O8cgi8_G(KZ2|dXIv!pp!-fh?!zHHByEZ~|%ZXoQ&yYYG7H#vsT;xVpXNGtqE zG@^q^m~&haGUJzhKVv8(urdF_fV-v5jr` zl?XzMlCOyA+lUOiP^vpianAcu zRU-){-vIi73}&+QCCLI>G=jsH`Rtr~YI=X6G7P2k2rW9C5iLR)%T|0k*lF$IXxUz(M0jo0OX6 zfOU?&uEy+@AS3W+w8_vmFhr2-8Psuk5sYrm@e(V@Ri`{aW|yy>rsQ!4J8g04^@m)< zFWP16=5X;7dLmX&aI4mZ7zPcernEAv2Ttw~O2qy=Od?*DVE80&WH3V1=jdK|kZ_2* zN!m6t6rts_75BXu_5nMNdDH|<|1o@#XIr~9RcA*vVmQHr3R>4%W(L>kTXYC;SszUM zfNZv`D@D8&I}*3wZF8I%ASR+gRiwR$l-aDU&)Ai-#b5S`>nb|k@?B7>t(`%C%fDx| zS%)=TX@4QjMW&o`TPHZVghLntC}_cyXc+irA4gi^EY7y%VT&Elfl_5f0}%O**I&a< zEU|d*LvojaBSV7wHBhk~08B|Yqz^TUBnO<;%WDEK<~$yEp9Q^)?!e&P_ySU2B5A(x zj0vAt0fr4r*ofN|`PmCk0ss+emBPcx04FtK0jU6tIRtvt?D5s0FrXTPoD6TWuo1FuKC~1SeUy+01 zDBt!sU*Z>lhZl->9v6c4B<2a+pT(op%1(L?Ku&>ZNfzW6urPFxL6i%pg9eP}?1PKp zd_B;k6EVuZ_GeY|5?7GsPQU(y-M;Os%JL1<=4^rz&xyy?rNzpoR$NhLg%1wCZe^PP zJcNExOm8<(OlLd)_*54KEF%!T*2QFs{nkxxep|b)cQ}We30p&>Ka4>f6A8|r&)o87 zX;RV+QH+<@^X490w8yZQ%a<{1tPbR8N`1t#rd*Gpmn?viE-2H&8Dbh59ZJ5QFy}bq z^T#!b3@me~-f7;^x~^>>n=z~Z0Gws?W^^z`KY;$+t_(-15w&W6@H1I=LW=fzx{L+J zY1F|%{n(1y?u<71Fe3nfqaRbO`E3D>jSw*=?hzy;j)=0jXug?jaX5YEc+|#Ud(5=^{z`wJ zgxZY1?o6P3ge@)P=ZeVB{ItzPMmEh26-fS4X+VztUmkh$;g$3?JXoN?XpF^J9zMl> zf1XI8{pC&0WM{dD>~s|pGj+Vu4vUN$TaP)D-C0gwlBSFCtSIdq1q%6cc7#t|$cXem zy!pk(cl`_T!K~Orn-h7PdyU?2n7!YWJ-QHMF=)%b%Q4TD6MBUxY!;7IoRIb6fhlV1 zt6>F97clR#?M|yq;-L~8*NELHnj?UPLk5+DZo{fJ)a|OLrFQksoBdA!o;EWhL#<$@ zij!S6H412c6@Y-(M;GH|aJOEC;_K^@3W`>Ve}vQ8e-DIkOY)D{T-NRA2|W%_Dq!WU zB1;x)pVC-31bBR(fEM8UW97W3qpm&x!Ov===3l-0lBHK^y84q@1*C1yQ0lhyjA8N` zA0CN~M}PRLs;1^%`p=pTsDHS>21N?_A;N1JXZyEtt~SQ1++`7Gr+=4XoXlFiECc-h zlE*FV`ip^EPONT6nyYsf6M)Eqn|1zhL|nbh^b~d?M{=}HCvg%>Li1&&E^c^O-{!4e z&XWYs+P3tD4W8UH%>>sS=ctd!olL+bXxUPMd)XM&9_lcq5}3D%rn9!4%$O@IWw;DS z)O4`X!~&80Vy&I*;>5S+=waV?`$?X41uFYEs(hR`YULsSOEq(aVkh0lg+qF`D6k8w zNQ9EY&wO|4$}Z*(v%`1T-*pRfLz)~!@ys389cj?&6)^sFJL>&Ki>hif=ZP?r;0&Sf z7`7!TR*<+)iRoDd?e4Nir6h>_Pq#_gF+0x7A3Yl-*g=&`MSiluq zWNUX~fh6PLMf@Em|VMfYR3qC+0`Y~R{dk@2-k719*I z&VRQLvu5!cHQ$4$0vY==^b6&(aNP%kA0% z772G|%%D`z!|lpaa;=tr{=6pjJygSQ@8&A<7hOqjvSR!YZU z#NY$E(fzgdl2mssLje>2q8oW8*g)_Uig_G#f7lI3scH*l4g`yv&+^8cvew%8tDTRNQqBCvupmX> zT9U=mkv|#P_0B$HW}Bi{H4F+XS8x{S#j|C8P|g59wE9!^nuj5#=_5V+6Ju#%-V|P1R5OCE!gk=x5e_ZaWL(l;b(ZtNFt>s^3Ls}{SQ+hnMc9Z^(mMuTqr_d={EN;WpT&U+ z*4+tW(RSV$duGU(l8CJ%E2PW#LAFQE10V^wWQ^Fp#;1C9@}5ts10KK!a*jy)19&oD z8|DRu@>mp`AMr`U#$+q@fp$%#n%Kw!EsrniLv8MtECe;mPv=V3HKI``1@1)lFtfc4 zkMqS^ZlF8sWzY6qkwhIA`kqR|&!oMMg{&g)@syX|23!#(@Qkn@51r&MmLkoBc;zs= zw2J~{+<T0Hjyn8DOEIysqlLIq71=!LXf*iVb|)gAT{p7cgv=Pji*p=TpNR9>8wgjM(98huu|1WyT&_+ z{3gST7jZHP*|_-MuYF!{n-YSin!UVDfQNxpS7V6V&JLIVnoN452xu9ZUJ@s>u3-md zc>gk{2IHq;HiMSJ0&-MSMh@s{9Y)jCk{xT=!Il`ueRON3R@lkryX!w9+akOzsU^Bi zPC^`o(sL*5B3EqERtB>Wejkdx@=5%t2qaUrs%U9O4jP5>3ogn>Fj^}Na(FGT1>g#s zvDk-a_{}T<*)-^9x|HI7EhXEvF(NL8$$b4hWNq6QF^1!gj%iMwnax-sZ!R5?+AQ|JndX2@oss3#ib~wQ>{mJUR-?#>d|iO50PJ2RUmv$W={J-jmch1OqyU zAQ*Gdqc(d5YuAyX4<3VVC3Xj0H5fQMbQpMMIj@2|2JUWx85FXlXLxwO&+w3u!N>|0 zM+4v;aSE*xINIZW&VCGZ@a_m!(rFR3>k@T1y4M%WY%{swW@p}(dh7l0Xukg%GV802 z!dH?OBBr^pOwhAGT!*Ub?%jbQCgMVv?%k!WG_elzAleOxG~y5@yP%F*ZM=+|2*E^1 zDDQfov2A{-4*rcAxWtS=>WaQxQ320peyVGHAD3vOeqw|FmUW5#U6d2?Nlw%!h!*@2 zgKnLfA|?9!>-g7QM_y4KVB$tI-)3W2-iG>#3Uvw)h01bp)am@C-S{imcgSn~K__0v ztNP`*p6-))Vbsat<{({PfBES&>1nd46+h};&}TXZR09Vv z>Vw+!7^oGu7uv6JrHOkXyM$BaaKaBlh(VqcwL-E5l&6W+*h9VO%S-v9t}QRXfzuXk z9E~67?H(zqmhfV zta6YA-HipHBW+#mj9(ltw}vuM&@f9i5yu8%n25Ir$-?%i?8ffNY9FNv`YHyW9GYl2 zY{0EU`ZWOi5>-w6u+h7JU$^UcB+de~x$R9<@@9Q`*90t=9Ol0~PwOqMu&WE6p(kpz zSD%`bAWDE|7Klxc%(NEyN+IqA?S-5?>l|#j@Tk^Ro@$$4!m22sYJEQOX3+J_Qa?kd zz=6hZ{pF?UN24%9jrm#nnxDLg0g$y|FXJ-FzbS|W8O#tK0q*RMTyTGhBhaIksOmKh zP|gA?K{P0O1BJBkSgBhhsC9nk>Gj%ba(70Gwt&T z|HtOLuYS!jtbWih6gqal-qC*G#G54QSPLcq^3poyv@O$mw!2LQgD~4xh>+|}3o|3} zL&ZhL=4&Lc&?M}|2;Wx_t?%t6&+kjGv)B(lY9UfYG>&I869Q^8Frr!O9EzhN|7cL? z$1b?z(|QsVEpDS!l?Ovu(xUJUL+Y%SQam35*FNjaKBH6H1C1Hu6A7Q$nsn4NgDLo& zLoCpK+0d@I{m3AU=64VD{#jfJh-13=hygU^3l)(LkKXnXd+)4l!xh=!>gi9!2$YY| z!u{<8wWpF0t|Bzx>*jP~$;ArZ@j4EDNBWFcp5r9lmv?@@Qw zh1#I%o5(B0n4FHjIp#bxzgAhMwh-w^sTUFKywE-oUBlX0d@aWmfWCqL>G>Xzbv}*b zF|_je&T2L+?B2$wWA6RQ7n$NQES&u;kLI(;v+r+!qG&WnY1Sd`gjiD%NQ)pi_ii6% zojchg(AC`2X!Z~Inv4ah=l<>*g@Np}GtG%D^!V7M57(i2o>AkZF!t@S25Yft8bkq* zu+m_fVJD6ux);ORiCK@Awbyl`sLtn^!*Zz0t&eZ<5ZR{^E&A-s0=40EsBN@_Q8}p9 z+Mw?vq5-9?`}GKboMrkTS=#xfeLKteqwiU!W*!={8oD8PfjM-d40t_kr+I=(S+aWj zU!1iFO&o$OY`g;GPjRj~*H}!NZg^a*-v&T~M5)tCdgn;6hiA#pEcTaMR*-3K#Klg3 zGKq5ro{DHMo|5wjUGla{#lS_t^Rg8u!K}I7UE=bQ+hD>>r40SybenC3ekd`0C1>ny zCp)uiL=SULttuJ~UDuca+bZs`u|4wolE~uQ)5jnr5iqM9ErB3cd(%b*y%ja^E~Gt{r`!-Wn4+DtC>1V_3>wUkop;8JXiQRyR@F9R zHQhaqqhLY~@Lhe|If#V{^Pr3oPRZXfENtJ*lnd9rq!{@JhO_}RTUJrkH>AV#X^?=Y zau7MI8g?n=U8Om8XpM*ZPyg2A0LIHR$U~&V2ArTFV#fg1Mg5}k5j~eLswCuesSu&k zXTSr{O^Z{iF_1Zbg5ie$v7w&ngdYve{bBlIKLBaPOskwjM1WG)tp1y};pCJJDl{yY zjJ8$c87RrgGJ}v@6^bB5jw!4qij2^sXs2&fs3`n2-)opN?m2xTqTir&<5}RrWm07| zA)t58%jS1cXQF!iSxJ(MqfA)(-l=VWYrDPV21lcJVZQfJbvoYufJ*(8in!J8w6N9g z8!ImBl?SVWXR+ie64rW5zQ;lXDNY6CYPwl(qZ+;A%}&C!QYJw0W7O{=mA6JQqJb;Z za)a);;pD}tu9&=Tc6e-qV6Q^+ivF%kPD4fdD#l31VSCG>;6S18u?fNP0AxdVs%MBR zQ~XkNb~Yjj=k-0}R&01gX;e68sB5aYG<(o5RH`2B#Tx|*zYXcv7#Tif_^}d?dL5m9 z?~G1Kc^2q++CdDkMT(K~TD+_n7fZX}g9skd!q<;WE?4N&1)D=;S!bG2 zz@owKYsvCh0P5&j;4CdAF0w%vq=g@YGR0!L9snGMUhr5IodwZ14w~G<8ZYs8KP2rh zY2^S&OZFM(Ro?c+*D8j=Z5D{m#q^8q{L{q~o8ykbL~W5?eC(N|a~XxBJpJmjN7UBe z!+cNAw075t$0v;HUDha9EywMX`Fr@L@u@8_1o;`RmnqzMegc|A!QzKowS+N-^&`6H z2da`#-KzuCaFiT-zXPPnhg->l&M{=+<5qjp3`YRH=|nVuaCsgjHo2c$cyA_mH!Jr< z)zG=}Ysblq8I`Xu;H9kd_$>AsX17V6om(HzQ;2W4Y{Uw88{~vbTjys)YBvBqNnIyTt=m>>_8{@ zt?j34BBsrGzK^Qy_cy#vHp;iQ-(?VlPK^p*o#sHs$M0>umbrZ|aa1am) zOZ@jKQc-R8$}i5RN5nI&cWhRF+6QG4YBNuHff08e3EhKT zKgR+>NVL%Eyvn%c37sIW$dYP|n}kZ)FX<$_AHEk36ad!13HHi*{m*_>S2-dMwRQc> za7PCi-dny+W!xFra&yQ?vo= z6x^mOy#|tPdHC6gtEN%YZZNUl(!?UMqS+pz+WA%rtMAe6X6)mzEGOHGVs6+ktnr)q zVjTGcH(APOZgLfzq6!g<=`*&BhKZgNW^XIa^m4QmBBnbxj*VSqW!h;(ozPvL#Q_Rr zcckjX^@%K+EqEM9dO2^Yk|>`OvvnJ2*Jm2sAR`0L4|Y5}MP6Kd+)Y3?)o16BV!7*q z%A0a0ebx+^iMBW5?(fszZL9OWd5uj4nltsV8_a5u6jJ+Ug9WCkl8MHkMUZ}l%i9e2cjJuqXSevUW|D=no&K7Sq|W`nac|wT_n0p(G<9VouzZXkZYOJzAhpIsJRcFo-h;D9w>;%bqf0 zG|h}FU;QHer@5_u`OEbwL$^m95hf`(C>UwRlQoVB!(_YNu?pJ@6|eGEj=(WYV+Y2k zTD_dDUssVd5ii$;5Mxexw{l#*(l;1{w0LZsFuO*Cf#PQcys1@7VUVKP_iPrUwh?B|*_9-ywK-kOD;rE$srtOB1S|Ba zYTEFq839L#ADPQ^ozKWCLXLPaH|@~6%I7fc3a!vr6a(GUS_h4S8) zSZ^-B5&7tBw}AKnu+($lPS+le_=Mt5s}Mc0X_}`azxDdieF%ubI_K+!(+zd}gw!6y z|1qmQalfHxE2jc9y!`?KfKmH@uDsW_0=_PZFJ~4po#M(v^Wug6%KD&g+uxHDuU`tY z2pgezIiA8kR?ywdYAoPV;2C9q-uJ=e-=`Vq!lYF7ZD*6~2@-|8FD41M6`As)|DfPyV)7m$vwM?TEjoe^LLQ2naE%#<*lR#XOm8 z$17rH&NdH)ni@g8k(q4<4V^JXpx#7Jihq~bC4Gh-pd`lRxVBG)ERFzP=@Ot39;

L8C{jM0Sb%^Q6h@lcrb%WD1PL2>G0+26I67*(C_T11HiAmG}S86Y0a; zMHXB2dRXaHObK@@*?pHtEIF;RzrlDZBmqC98Q%ih45*QjfVV5N8i=gq0SfESf4u~E zF#{eCR73+mta!kel9G}A`SF*xDL_X7qL}>o0SK_-fC(UJe}14Ff!bq{Qu5dXK5z<=@ZpZmgxlH4Pe33~Zlz z33ja|z+<`K0z_<=kKnG*?0G<8a|(4+4XhY@<>(1rtDRcdpJb0C$I=IJ?BOv~~d^_4E{HQ6P&n{sX;m+PJ99fPVVo-M&m=ux9M6G{O{psvy zg86z=`DOp^^zasTqHeri0ucZ|l>a`1m;Z+)QvkyX$H(fToZB3*StH?eZvS|8W!v&y7|n5|}9e zNS`S@na=1hB*K#ZXsGLRz?noexZLxg&(ThTP<_f3<{gDv#V$C&YJmG7_p0SYE)?WC z3MKsJwZtjngZrsPdf#t_9HHCDLjQ@1O5g!3CvWGXmodtA#)0}MmFN8y92!Y_t);S!A%XWo}5 z8nyd{c=qT?@As_t`@ub=mLh6~CMUm8em!C@&h9>d3)6cW_-3Ayf_q#Dm`RtbQe-7; z<6ee8MZle-$<<$I65JtuCWM=71UIK{=W96xivR0dtvkDOq(2*OQ{|?+7s79CE$^+D zruVY;AImeT!w+CIjce6aw_~UK?xBNYw6-yPO0QPjc`#3TJmtOZaTbM=2N$-^4pa~> z*UIVZx3^`Af=%rCaDFxuGTd4D!u!d2pv>SqA#$<|wWe{K_ET@+E#Y+J$mMw+-mvtL zq(|33uu7*EiBMRk$7?*DuE&`58gg>5;;4V`gbD7&$Y?{ALvZ~YeKy1oySQc}`gN{JxdB_Jp*N~eg3bazWAE!{|p(%mgx(lK;1 zz|bAUZ4Hlf5Q6{9LFfnJa=4qUgx=W&9D@|U@7Ilt071-pkqK5|BtmL)U}?B z@aTrE|8|0(9De+NpBz7axzV#aKqO4r;40xMX_CE2?BXvWp7bF5vU2U;J9y>+#j+zp zISIqpmhjww4a}-|Whirg2)8%0045@2wg}o)T3^aE+$G32ChqwbZE*C2A^mKo<DA3 znvTxaEt$oKv9Xd@bjg$Vs@9gPpz%16Y0CMk>ltTLz8bI9BYg^Mt;I7kxGyYb`1grL zWiuOIl+TN2Kp!<6z#fJy_G7a)N+u@<9$F<)pwyY2jOT%)pKhQ7znC|ELvl9`Lt1_A zp6FlZnBJbFo_hVgr|#^VeLqt|)(eY6<)t>zdFF8EtdV%e#3}3D-_&@)9QWf;|KEe# zAeng2DC%0fs3~JaQOai{xAx?JKGOdcxFq&8dHHL99of%v7jv`}uKFjsy5ujdHz|E{ zktyrB^?;X<(rXI53^HDwG@gvO)=!_L$@l)V%s{u{U(4Kv-RpDDDe5}o&ogZ{b8B1E zlGP17zvLqSwWnhByMA&ez7)eBUB%NEYIz)}y-c!_$o-`F2mW~{Q<9a3e!=hLP@tb~vrwgn z)W(uVt{DB=*(q>r|28}=oTmx(1(O^VgfrfeqZL{d)t2sr375FL>lmu*4WnM(a}$#Lmd`VTziE!&Cpc_SYU^o)+10 zj-K1#8Mur1(c2;a?_;dkH}Ac-!V6*|t1%8b_HZXHu2Wqi<_WXmhe}$M*7DqGu!*S zIy-w#;zAZnns}^LHmcVCdS&+WfEf;Puf$XKwU*KajtOSiy`SGlv0Hp8R$1|wXe(-v z0^F(fBh?KM?nC$_v#&R^*ePtxwrexI_zO9=PExbHa*LGvYBw^u8LwyTJhh2y{60fv z1<%SShGb-@;ipXza3lQx@if?x3&GL&y_lgOjTVK1`p9(Z!GhgGv0+=hn2e0wM(nru z!C%S!$3H5{$Y|0FQkq40b(4~h*StykCxLo6^S8t5p7l-7$S|BnzNUI<+O$aX)THsm ztac0$Ya1J(GR(T1sjtoP3cCHPlwW0BNcH)V*<2BB2CmmzG!Lsgm3zA%g)SF<7;5VY zy*HAO{(WqFRNbU9Kda=qjp8AJuu<+aA>xOgl|Ygh4-hFh{sS z>_?!1_8ioW4eT7fwHFN^W|{pPTq4Q@PS-8_YxdL9*KLcFLv-T6YtLf;otrgK)%d3H zD zFYTv3ZezIDM);95HGevd{*t7m_w_dF3{!@mpTYcPT&2rVT@AgwaVP5GRnAJo9CSVN zgaF*#tH1AVWHmB>$Zc&o4iBmGXrUvO#k$AgSBCZ&)) zs;#x-NgD5U=iS&sC%kI?{3dd^adekjLni!(NKaOhKBhxBGcL9%NZhDY^r1oB8R-PW zZuF@fV8^RL0qn0_)>YUB+0Z(P0(~-m@@8%N1@WT%0A$C3K$}Ar)D&3H zs6`G>Sa4}H;DkZ&nhD>+O;AK3jq6f9;QR;rg(d7X;d5@q3k^?zl5W}7yxZIvbBI_5 z+qRVGcN(#5Gm8Chme!uDQ9Ht3YBR?(TgmG=F3)27XI=J8y*%E7@>yIO1M$n>HU2>9 zYXJ-NzznCX7dFB~=^y8w;FIk)6KoNciO?Lw)=sxl?M6MG^IOkbI2GwPvc|LPefo>qKj+E;eZ115g6wBL1SQFp72S2xT{we!a?i_TZ!3M`x*~H>thv$CIny z?yUzqB2+`-=47GQUTeoScOATrVv`YdV?lMg&9j3`1RMIyxb!CrSiPCnTmHlos4Z^T zF>L83k`;^vF)ODjACJHP8BQI9^yq@Y}$Gp@UbahQDD> z%@w(z*0-CEg~=cr7N?XdBoF1Y1RMstd_I(Foa#aomweXK>P+a1jgd05slQu94PTWkiQ9 zF#)u`ZdZWie$=%8t0{dMU};M(HtAD;C20FK9~MVQ6fgJeS!M246xd5 z8B@I2j$;GbR^Pra(k1ylo9$7mb%(oPKp{R z!cWGB-_}YBBk77d`CoTt*KhogEOmcUiPH=%>e|e^B!5~n1iWxSxXu){SgNg3AJdXLw)@NyBS8$^m1Wf_!xS*h5cnE_B#gV%9%Wn z>AM(_@&vJi>_3tsN$~?RCv&jrI0^xU{`)a6jw>7XhCW=fD!Rq zXWyDFyzDx+rv5Hjc=fXm3+eRFI$CCWZ?~5MbD>MulGCJxi9YBU_lbmGMFYR(#H?92 zousg@u~S>axJbCAb{Xb5zrXK=s}JE^7bQ7~j`v^K{nG=1cd}JU2=Q7NG#5(Ei{GuS z&d@$ULiWHTD=V|#IYh!9qvOfAk2C-Q%u|36={|+nGq1ZVaXA2rN#r!IdJ-{xFRSx) zH=qT4$i%V0F5kk2+7O=pR1(08%T5e?p6Ppi6HVju%uNw=mITtx@k zH04?pl@3rd9sF$eicu-)?e>nP49YQbCerD<&8VdJ0a*R&HdkaYX?Ybn=0ql-Aw~}e zBmhM3qf!TIFt(0hPz9f#WRnP)?h$xP(h z3KwXAm%`b=g6n{Fpe)A&&$B>weEc+F5x0+wQQ`g-1c9InaJyB=6T_Kg4mJ9u)N(J) z!V(^m@i)B+yiaB2e~lGiG$o3!a#x2y@CnKjshxEluef#JZHZqIY)nDNW;o|+@V zIJu3k2KAg^pRDH8`mg3*Ij(-!r<d3;c#4v(fcpC259t^%fJ=&|7A~o%0o5SPF7*YbsU3DY1-uZSw4Tba2@ZM( z5z0;xu5V7$wjDA*cGrGZ=peBVuBO4R0-AVwrrDnR;^0>sQ~C_cY%}o+j#Z{4H5F!> zQVGmX>d3ZIcZRQOwwme+yN^ch7J_4f*oM%qg=@hWVcYDV6Y#I+!R@ z#_+F(=P21qAps_=*B=nF>oq;Rc}y(dY$lt%FeZh+CAd~okSzTA-RhmHA19C7mcFI=P>ZA;rg#9ydK@~KOBa}P%1F?0?P zp1u@H*Lr^_&K5Y)ZT>K;JG)!7I13jE!)E<+1e<4JkXpcjlJL+z5ug;z8r^uzYN|? zv}v?mQBzY}NmfdtA#E}^pQ9EzxAF$Vh{f(Lo6f?;?lvNxmt*?QU_ttTVDYZwh9)k~ z6d+kk-mRwsDA({0N|Huh;Z#ltjaGrlN)UwKe!N74A6m(gbRa0d{kV0lzaI>xNxsTK zHL2NpE$K^~vXo2kOcV$cJJ(}kRv6xYhEyaB&TJ)+S02N_tXS+PB1~%cVq`X?LE2ID zF^G4>MF#BzcNQ091x4h2qdTM+SwWFK)uZu?lt%mplaqr;fubT;D~||+5AB6RP;R6@NC7g!cvt1q@E=ERmuYqDP#={K9D-};D92|IoTVZ_v7y%Q`Js7~hSU7eP_ zpSyR}5eSGRDGNC6KZW2p11(p?^)b_7ug zJIjOc&yyZygoU+1jtwOsk9X)@%Q0&YMWyUvCuL-l@nmnb(Vb*ATf+-usvN-NJuRKP zVJ_bUM>7M_;lG9KK$O7rH=g_6Y0OMbOMl(Z(4EKKdqoz|`5^X@lK#v9JFNt2A z!XcUk!8Y6dg0=QAU&VAG%Rr#lK63^f#Hno*P%w@K>{(Y+y_w6ev`jnzq07tMEv8Rs z!8qWfD8P>XvNS(g+1({-3LSF@0EycHX}bpCgor_mg|77k-?^W2er=?3Bk#O!R($97 zt!lOJUx4sGed=9DV#4cCKO}#M#{VIp2IfF>3^f1_ktRa`>Qd1_eu&~68RZhl!}o4- z8beV~eRxn3@vvX^^}=I)E~B=G7sus8mMYCgqwhJ#UjPaD3ainHFX#z{KGVp|A(QX` zwF)qESwU-EE`B@&<96jVHjruqZ>>6u0657RCI*;ihyvs?y$@DqYuzJ2qqyfHMB^bx z^Ds4sPU%Q)mH{E57igJ2(f$wU99Ye?{~DI$BX$JCt}6@#9UJRJ-EiH~8lMa=pjkQs zn&nyz4i{MR72LskHOrBo``hvRAZg{n~z+I~OR=cKtlNh5#1)WZ9u1PuRqksCRt4Ij%uUzj>LoZq?Lfw`2coDl{ z`pVXxZa7})CL(}^`fuG*3$|@PBqW#Xx`lEL8j3{*z>RpgyRwKP#PBXL(Y%=niTu(c)Ce<*?@ZuK$DH&vgR>XKynyN`RNd! zyv>DExLMBaOVPGGe*Z!ubgJP*7Cv>P7jC<8EA?V4>iwBlcTZ2bBb;ll&JoHU&u;J& z$^;ZmLel=e(E4vwB=*BvFnrU<{g}^rp2|vmmn2p2FP5Vv{X~bSmY!r_6Dpy4RGUEq zMgSl_GeiUs2_lnmm<+PUZWe`MmQn)s)i|fo?^7YkR-$LR5D9%xt4JvCz{)A=#Z6J7 z`|U^V&f;eBF{tTQj-cPVTsp(>Wu@n_>VR<5_7x=e$?Sz+1_A_%I(oKO>2y@H>1Yc* zGOrf|x)T<_4DQVPhl>n|D_|ePfNBw0qL#XcCeRG<^j=wil1%cp zCDxq(p(ukqoPrcTs}+>kERC2vb~CK)Xh6k!^q6UK!dH=wo2%SX zoQ3%s;1#qh{=QzPKS2J$kDX+QK6s+({{{uHA0kX+H{rFcki4%>xo1aSfXv&xsOc|y z@=ff9cRJN_F==9D`3uHV%Jzsl6()r%lyvukHC`Hp8@&5Asgb#!R6Hh)M1&AS&0n~U zVphWij>f6d930D@cU9F!;mSPQ`R|7RARBBU;3 zANOt_mroN7KGy&U8hUCDVo&q2rUY!~0>hlLI@TU*OfNPAc6qotD?s<*0vbR)MWRVONgtegt2jBPgkqf(F9H%}PODfk5ukjG4WKAb!2VYvWy>rgE3rBwyF7PTk58>~+-msEx zx_);uU7z04*RtBost8$rk8ENc^rEF#?~qbej33^^9?hh7p2e7IlLx72ZRe zP!E@JCXoyr{@_XzMT5aaucezIk6iDWdzXG*=3<8ggoGb{{|&0ru}8;brQh^ImxzD% zK_aL5X**=G4|}FTjDJe~=w8iJ>)OtQ34e{<$2R_w%~cKSld`-g^=p+%#Tf|Rp1F?W z&f!U2vj)vaD(SOD2CmIzHP36_C*5nH4@}Cfce%>T-P#kp#K06vMqN?pBJuO&Z5I3K zdZuCneRFh4N9bxQlVL>(J+X({cSR0LARhepr9~$tLfNvS#W>a+-QSM09ktappmN(e zsW&0CPP_E*T6j>*)_shMoK{Nx#%7WF^}?=V{hLRW zt*dwGjd$TitaCG>r8jy8S2Me}?bIu771n51G`JCaO;zs4yA(}FsVW;OLb3MQ-{}bj zAAd`+e@2Uf?u&;mJt$}@J(wXe_<~th;^5$_zYvw8^gGf^cMi;T$_+vDLVnnrQ3{VgQi1B-4LAHcS}#JA+8DM zxpK+~>v9&A{YDlQn@&Pz0b_kMOi?Pi@ni-jpJy5*Ds%hjv5vSh#*4No~Q9a{1xTW#tH)X=5H6(`EJ4(XH^S=HtY3Es@>$+j`3>ox`6J-!>Z8oxAI* z2>M*AO!ExzjHO^6mwdLDPbP?|kNFg#7^d){>A(nVhtD*a{(*PNY->uP1}U%NA8 zt9hfO^Xi(Ex#US)sh4Bkxfa_~sg>H;yM`f?@yfnxF^XUI=8&}Xg`y^2{l~kLdtKJN zx6T!p+2}eVS-5P*-s)uaTf{>Y8?nhm&6tEcd%V6rv7HRXg)oYxKvQ?h2-MG-6IR#O z$p>Uv60Oy_V9{63QqJ@Ss@EMeT264!mGiTHLKE|m3wKk83KUzoc2o6U1vwV-Oqh}% z)d(X~sp_j)Ql-8MH>1+qXoajz-XFv|%pxxKbm*2&gFvK%Xa6+wO zB7Wm@Y}LsaJUU zC}7B`UL^Lc!&LaLXlCt(nDej|2~(u{@ev>N_U7u+;;a;%?PPk-9iMT(3F=-C>u}do zQ)Ac;yBegiPe;TAJe4TbM*S&IRf!cVQXJcmtd2CD_S%d$(p_WURCcF{p#J7874TS1 zkU*R0OA>fd^(lpmSA*E9!R_YW*RN{tX1m&IkFv!`j1`9Zr4=kFd%@UyDpeWNMz^D` zhhNMFeMrI3>8sHZ6Ze;Uhb>}^O1*IvASsb=216WDb}w0T75Y3s?yy$7VKLj#^?P2O z_Rt7JvCQ%=>$Fpt>lv`PItPTpru-hf8<-D}Rl9oL9le|FB*;_h;|t&5(C<7T$cy}%^q%q>T4T|{H&HZ8 zYqNA2rc(o2Iu^2t2`PTq$yMFNkt^3XwpU81paSP-Pr{yQqmLhmO+gpwdqT{i&#LpL zlj-5Nrc$jDVlA`JFTQ^-;B)=F?nV;FrWc;VGWyP=PGW`Ngi(c=-6<&j5OH6GcWiQL zthQc`Lnvgmt-P$Sebv?dfEl(RTHohiyO(rot?Io?`VpFJ0bj1x4iZ|gTKK!-|Gi#i z`d&NWY(<@n^oVCVU9iHs-Ku8S@*9|-Iiu{b>JI~hY7xFjL%Y2V10$yOu$&<$(|3rd zp4MlDZb#FD%+aPkNLlx0gni9B4&&ZQ|uy`ELYg@(vSZu)m*zoQhKNPRzJqJu)7vY=cOWzpFu zM#wA@g@hVgoeeJgmV6#8ha+`R4%n_kVilE}GH@8&x60?pfYp8uEVjcQ24Sv3&#zAf zpOG*DYo1R|pT=A5JqM8L>Ex2Q2qiSEzYZ>4xwfg!d{B=0zHclLkiOEgi9E|Bel^Pd zl*OIvJ;|WVyVs%8gm?-i#Z*Cb&xmD+}z51VSw%|3+UI7;ZoDV}WGOEzJd|aV~i_0fzMjK0iPUFrc zK#PQqf!*#G5b`U(f>J@~I%NWnRDjz3Y?p*7Eri`pQbv0|)PwMeCiBK80zw8NryU43 zv7pDXKxZgb(8!_vt9S`e{Fln*!lT3!`SdAp1ri|tAPf(W^x-H+8$;&9FmpDPFx<|) z#$QOBd&!6WxL6S}l_VlvmsLRUsaFU7-Hvlc&+IO9%58S>b!3%t-1ItN-{+h)PL}_i zCa^>Q+6N%~Cj0E|gCrg5NJMm%4FB{fK}0}7-Lqnbhk}cOkV1}cMtQ-|1(`+Xvb^aW zr~%2j?uOD(J76g?A(wumSh&-I7atjH%%t=!dnZNW%g;%Ojwn9k>&Xc$wi<`Eq>5iu zsMp9ys*}UCj)J^zH})FQad0@pP5l+}7PWD8`RQ05z44K8w4|b^&+0fZNfu3Au6_et z-~3l3RMTvZ)Ta1Rj?~|y)xo1qmk`In+PcR6EbSsgJ@)5U%sMjghYmS@!2?*XYdcw{bM6OZwn zMAMv>O|wFiST{5{^trZMZs=zC~kop#@v_S*tsRf4+1L9>Dm9Nq60*&u z7rqt?%xd(5dM&L|vMLRC8NuevHssP~(}HFzUT+E!(1kK}*$!G-+siiTvnZ7kPcaxZ z!*!G%s64&79GD#m8s~tf3%HU4rJQG;QqdKU%<=~1$elwMd1po6%`CAK5r^(O1THwS zsHnKgV-luBH91`BD>XT!)qBXNRCYtEVI+pAn1h=p20|C$cEl{asrMKUc>#=kJbd@S zDRZS_wvm^UPq7;wO{_?95ZbpgO~+yq%Qs#tXD`+YnjFAQnE0I)dNoss!?I#^G;M|{ z3Z2P0=ypo5id7X=wMGg6KI+@IZ<5pvLu#ISPn2TjB$rdTgHs*JIZ+=aJas3bR=lZ} z_<<#N6j{61d)u6`w{0{w#yeAWM!zhM)8sG%x2f~?^X_o3?XWROHTM(JwR}^hf4Y0v znBShq5mFw<=^Sr)o6P1}C7c!SS#?z317mLf>2&eKfF%71+*PqsVxC%SEI{{9L;Nf9%$i5S)SX=3Ut-wY*^xFkhuV)%Onj*eL=$jsq+XlcR&^>w zBq@IL?dVWw@%lkl4mHVt3U|y z8TSwkmwIYFzac9wj<^;ErerDx&T>yv8|VkMTa&V?F09MWJ(jTY{y1O`f&I7`oDr@Y-p`OC zMmVjs6myYGsf-ntuTz_+UXjP5(&1eoPt|_bN5TvL9&>M-ets;r@|fNTX-a~HD&fO~ zz1h@`QndyPldQvNgNDxL%YY}#qp;aP+52uN7?sl0!m}MEV4*CW2x0u{I`JA+Nz~Bu zCzbvvJ`kW7)od~qJR1vUn%t{fM;5nHE;p>Zy6ia)Of$1+pSz7N{h8K{kuf|r5=gLN zZj;?WF6%8R+iR}kw#7FyG>8~(ip*rE|H3k;e1FrB(Z=qJB|M}PwHE$rlAL##MolRv zyHTofcb4hqdf+8466ssC5^Z_Zn7a~PVmHuPJ)z|eTHr{iXrUab9LR*{a}H9)BYT9E z8l0$@P5j^5hAt1NR0RmmMi>wV=)t%N8mpX9MJe`cpMNU2CBVxSY#XAHkfq zwE}e%iTl$Sghgzyk=Z9|J`QO2vnD**wL!bLw^-uR^UBN^usMI=IZ14Cn6@JvOf@}x zoHGvyXLAbMYGvS@En~(sk2MP8QmJZ$QMx)%NkvD|MfWtu4O`n}GyzxLs!oRFS{#AI z(oEy~?0(UZsq4|;VwcGg7Q(m;|9n((^__;dyai`}U@<5L{Nl+`L za#?M%F1^G$0-l)Z6Ep9Fi}c}lXY?G)$Kto}#06LouvCKylfKV}&kb!?Yv)!fAX|s# zcdIG+-GhmKK)Q2T;l-A*Y^u`rj-b7XdpF$Y^ROXR??HZTmFJ*i;&cM`NT6k=uXc*a zXEoy&+|6t;U$uLngiBDyNN3%tuu9U;=h$vrocRD$Ydb-45FRNsU2aqOq5$IfwvHFm zuh{LM#T^^)c>7wY3H~$;&$i7&vvL$PKc#%9oaWc&DpM6A+%#Z^#0)9;(D?~9r>qaT z6)9Hz^}V~8ahHwgZ|6haFn|2tSdk(y%J&=#VQ8**?d8G}-{(CP?Uo!Ov7%sgi)TzE zamV&_*D&_`*AeUr7b5|US$e^@L0AOh_TPkB1L_`<#3gY#cJ|B&js%cKYmi&?hS||= zab6GOXN==-+BiZ{^ukwETMN+L5SNNW(Td&_XIlXJkyhL7dK){p*!uLuw}Mm6&hG;iwogU^`j*&v9u&F8 zZTyb0=EK?5vTxSLc(b#Yw>=PjVJ*sF9VlSVG&r~C)z%~m&)3|wGFo7a%~{;))LTlb z?nEGIcK<7Jw&`rZA*@+@8{d;LN=iV%)O<_8Ua(N2N>4=UySQZtbV}6eu|6acFuom{ ze`_&chHXYhCZP6;Q58KUPNGZPd%_|nCN=yi8$?Mz-cQXO`?;#`#ViEZ?wKkI;^W`gg0;XckKIrvV`EKR! zZmAT~k~jvnw_>m6l&7k@;H41`EMTTqogEG{Z~n(EMDmraKiDQ=*Y{hEdHwpedKN4~ z977i!10$B@URv{YeRa@IOQG=D;s%Wm(B`8v?R|ZHKB_F&*DbBgd@;zdh8RjqUi-Gq zvaJwu3mHb(cA7^IIEGVD4%hB=JG7%B`TN1#4ozH2r^n{cy>Ld{>I#eCo;oE3vIYi(*RE0LX8=QhR8i?wHNd~=ylT-@nPuJrC zEzJqn%8)3~GoZv>`junV-EvE|*5ZW0pyVWlhgD#+v(~zPlrU5zvGhbW5Gzs8zC)|t zp}g)2BWW)UZ|_vN&L))FSZR_Zbww zbG1!HevSz6J?2!X>lz@~w)qq19 zhBy#_VZ?WlFi{?C{qp7~D*MwmIyx3I_mH!dse@>A9E@0@l=r2k=`VzBG3kt!1RO8f zmX`@0vBLSZ<&>0S&G5*-?Mo{m2ZRQYMAmLTD2vl=t#1E%5On6%mUlz{l-&&V>w4T{ z6&H$MHg}X+EbRS!IQGNodI^`YSUH{f#mY`b^|n6*Z+r$%`mMv%tUA8?v#U9u46#GP zZDo<3BKhtcX4%%{dG0#RV-TWKXvRCDBM*|wrYN}$lCUe>b*5@qP&zzEJWp)eDzPU= zgmQmfj5AAD#ik_aTTiqL#WQ@@e%ZR`C5Y|sX)Dw=F6T_>hY+Bw+s(f_yv1~_X?gjG z*2v%!Cgo!W>ZyA}7>uMCt(>dLLp=>Q#<~d>ok_->CBH(0eEg$uP*_ryTZ6GtF|9xM zpK;lah}ce8sW*5&e*5?z?rpgs^?zKfwpIX%i@&4b2kB%zQLKQ&YJ$L>!ya-M+6qt? zL!#c)wX2i*V!NkNsQ-8kgA_=IeeT^RuU${N2b4=Xql~R&BRT6^EizNx$IMq&FKWg1L4^OPd*#EHcd?)%!e<36fNtGy z-nDb2U>0$A#@Z98B`vlI#NqR|3dH7mKwPVd_ zAqyH?-)UNm>%(2G69w*9fp2Hk#ZMh~M@71NJcjsQ4h^izqvMx*OXCpucsL;>^b`pc@sfB1@f3XtLd;VX_E zXx~_eryyWDDi%(;|3DHBUuYjR7!Efk5^y`koWaRKaQE|V{8pTOnkxBWj@l5O`^-3@ zWExMp%z`BO_(QU|kaP28Gs$>g$XW7GLXjw4)rJ1(9&PA==A%Jva~;_;g411iBtO-M zgbGjdsFJtzHRN~4!9jt+yA^*qt_(L5K}13iOOWI~NX=Z=F?SuK!rQ^J=c6`m^mD{ui7g-Q*OTK!3{{&i!02TAidZGuph zWk+U4ep|63juz_CxaDf?mZZgY^5VJ0mCsZbi9)rvVodMA4^Nk>VzN4C`kfSCm^@_!jFzu9jZI%rulh^aVFMhtUgTX8RYSU|&x{Y)2 z6$*E0?kk3LRn~`9UCfW1xIZCHtL2Z#})ZV*Jc;>2a{sCk86F zpqmgFc#nsdcY1w#Ybi_pB0OBlz&N?sjT2M{mH{RWp5q6rssK^X<;6dB1tj@-XV0oD zrG`nAiKEw0i}@L zYhvSyPQ#p1@i?O!0w7m=mF-a=5$sJ2@8)#Q1#D&vpqyOCd1L`+hoPTRQ2wv463 zU{uB7(8;K{=6TT6eBP+c#wPj7h~?Fn!4MZKjlDN90vkIkPSG22$rT3FBz;VSERu%} zhkHUMjC19KWDgU0a*xg%P79nMH|h4??Dm!#r!o(B6R$e>((lOhuhV|IsWmmjILrGu z=}mG3%u~(odK&YZt-lyRz%W-9K_lUD?l`I2~ zfFh#e_>LO|1k_-cIA#skc&&8c=3tX61TN*+=i_{AIjeFg?OiUFne$C$7`U(-r!JT?{cw^|xlV*UAbi-5aYv%?5VjvR`N| zN3RVGg5)Rn7|g%9$DOZSwF%h^xP^D7btP9dw$JGZSK zRt!n%uNgNW>Me3jI0`l_9}PE2p42#b`4SrwB~HW~9iwjp?a3oN1>nv!v5n)yrvD(4N|V5ikz4ibvo15UX_bT1do;H z$HA;T82_%tvyPa#^BU{m3yUoLl*%08yB-0pc;U2e-cdz8=Q?+^7{chz0~w_`f1ip< z^t2K(mH^YV+VmgQ@)-6*#-D7*HVYgT&-1#3#YG@IBO=z&I+Hs#FM^JEB_vdM-R@l@ zsN+rN&jc^1dTD?*3I}6fX=R_9t1L07Sigg}n%FWV^2Xp>+_+PI6W_7fYUHBsmj{KtOz|%rZzCNiN3kx4OMzUix#qQqu*m&(FHk-}vONqm`{7 zed}nGk$(s6B3A2qi199Cbf}8DvXViF5oc|K`vdX zFX2&^^zCi*D(dR*N0PGl0N&y0$79l?)@_@mHkpouisvf`?$6a4c3)*=*u8W}YPWY< z3i0J6B^D&{mohbyXyLJEYdObA#^be2x1BOwK0;JQPp!e0B%?SuSwsm9*ReR{XK?B? z`jp_X{WyBhT3@}r%$b%EmXQtCH3$}%_g4CKlx^GOQl9N}ZrI7Wmo)h^CQDiGQ4%84 znJt>9L`>C(L=7J0~IJ#JNxkNR4f!~+uf;@-aNeOrN zT#8L=YkSe&chNbz!v4+6;Cm88@-> z-dubI7z+RpK5?_?$>eG)*;jTy5JLQ>14d}N;$u<{1;WDjzy!y;6pR-=n$2e1)GncF zITDgPsO&m^(i;3gK~n^6ish}eru89m$zi+ zWH#GwGGI{p9Ua^{Bc8X5UoKmxeDE<^vOlP_h1~5&&>X}^piu;g%g)^3dS4=W zK7Su9b!%`tkLK+HW4cc%uD5-2R4=}m{H-S<)n)s}`sU%YC4U&QA{c1I4)62Bro7Ye zUq0Z-O%?YPkm$T>Phjg%5cm0{)y4VHqpBtB_Es%Y`*q+k>IQ1s@zD_edF#-P)}7pS zAKK0SplN&m6xX9mpD1$i(B2Lj@y!TvPaeE<+|W4N2Oj4+1rN@j^4Zo(VAZ?-bQ6}& zC^0vpf4XMySViSR0p1EgLR|oR#SQN@foc#n8QLviBqSBIt0-`kF!kyPR(-%C(X}un zj&Qc8A$q%(j>8z6n(CjzqPBKDbCUcj5UP@wV`4Zkb5i4VR3-hWL|^U$DZqcyE&8cF z1ihdV!{5tYA#Cdd-SJykp%#mm=xX+P#s?!Nnx!9k->wj-sA`2l8pt#o-jS&%Rn&Zv zCevB}^jWjnmf*`ehxFdDNTT{Td`#tRuyHZxP$34qOLDZ7=B6|LaFIM>4}KAB|MvGr zF}6u$l`Wen&(LmrhsqWr6ylkEih~$?if=+~&g0L#zZy61$@v$fF(s7|D7uMzoO@>E z8!+PtWFg%PA8H>Ces+D(j(C0*pVgtt6mx#W$s-;+K<&Ov{yCO@Fq&N2BhhDTu*0kCl zmfNI+7MweM_#%Pq7vP(Vt5kk&r_vxTQ*!OT5FRat{kEPF|7R5mN!0vb8#tw>32=Y+7XsRbs|-ft~LY z2W@h*6+mT7$GP9dFO++he)kvC!?ytJ`vCj**p8CC!pqKvOjJ4LAs!7%QwC7|=@xLH z3SjzU{z6q29guHlXkuzrC!U}W%+ocu9Zh;SufBP3OOx(>ztlD8ohZ`n$j}yItCkKb z3QBG*=`5OcRY|2 zsv_(1`$`z~j_HO{3FAC6*-&|<%|1$Kcl7@4vx$e4;;t_zM3}|BC1*h8Ul4>GVT+MZ z6X#n3NLE$|-X%#-*B){4`t91O%ACq?#Nzqm8G&x?6qq#eLGkm;&L#x)Sqx!kSX40D zDGfeZ*bN$v(LmivcX!JEyTTx9rGr^k(EVnT2@;47AtjFODc;>fCFTe&2*9qT{l@nC z1Es_ID48;!qiCPak_@|kHQWT%W(PL!7m}!Bs_7t(J&wRp!jJi;4RKbADtIGVLXA(nK+vNnQ*7HFU z{YX9xq5}8tZtlS1`uY+4)nz}||I6T*t~zE&n5Kn1wL3$3PW<$XkCNCVYj=MVvoPn= zuvzJmt#n@Z@EDUA1gg73dyNlql~#6zb$(a!O<{fDCoSEC(Fn7srM#~ zsjrN#DEKY$BMk*hh+^M#r=>7B%J&V)uOeNoU*3m2@-y36Szi0erS$X3#O@Ye^8h%! zmyMqXEA=j+Ed1!DL+-rznl}7c^&;c5k5GZ|cS_`6eb0Zw1C;1CcZgFR`aZA5dQWNQgqKWDlSB&oU^PM6~Oj&bhR8exl+hNzpvT#YN$@Z+-u)K+nBqr zQq-!CJZSr~8v*mqY&9-6HumA7XBt2%*)DxV9SIqvZ))+fKA9&u3(A9IN}}hTnPR@5 zLFg`So4WehyTUU`4M)Tg&Dd$@n12NXQ3NJ#JUN4Tm7ndehv549!JCI=`a8_F=((Kh6C7%66x`MmCY@QAM=za#40teSL`^H0zN^wirQ+U z_TwXvPr^iwnoUr0Vynh4GP4cEs5>Xm3DEtg z;=GLJxV#=8f`@zuvRw1d^WOCyvCh(_F=vgn7X!Bxr$V34k`_Y&@alH46&V!qqTO(r zA*}~1T?aziI1Y*eoG zbMFE-%(akK|Ey#%wVvo}Cqv(`;M($Mi)xt(Nf+8Rtw2vZI@!Ik7BPXe**P6$?Hjk; zQiH0l%bi))JU4oPwUn}_$Ym1`s>m$wPB1vswS9Cg`1mx_Wc>~+;kX&O>~7%b*RO7; zdroKj74VKAydUw;E+Dum|Ew1r38^FqJt4V8^o1u^r^GZZ*eg4-&t>S?M3BVqM9{@g zpb^_MZnldUlr^cV-+voF9UP(oF@e00vkocGho_(k@YHBon#Fr@(dZZ^j;9TVvt%gxF<2ydAqhmNRpMNyIl?DrWbI9Li1r#feTnMFkI*SS!M^pBVvf`|an5GREYfFwc6l3|L+toasr>+sAD`Lgjo zNetT2DWn$8Cs9CB#Q{zTJ`J1il2O#F4GJXGH}h;T5Be|P!#`}6znYsHLFy`M5n|25 zJh#io)m6rA;AmOtQOt9liT+dseiVEwku~Rt($77v5#_UJW4|Lvpjyo};UJ@vw1PH; z-I-pVtTd0d7L)a;NPqgf;G*#Ol~n6T0~w*<{EK8&*4X(n$w=wcWEl9--%8p*!-9Y0 zY40>T`;nR9^fA9l@@XQ2X8tgW064z(YybWA{ry3j1QL#B#fhC6^mV+a?RFy;haHG=hyZgM(15^&IE z1m0Rph)Y1v!gJzr6o`&LIy_6D?3@?ftijhy3|nm*RL1BOjFcoUHF+1R>I^ z6>|-ELu}@1pIP*+FGofsGH^zFGcMy`+XF;-Zf^dMw#cLsoks*Mn-AUD4`Y)PB2(+` zZ$>!psj5)2(Gj`sszM$ZJWgk_-#XURX>yPo06RJr13&h&WS0{@QPFlquSHDJb}Ex< zl(w+*b8*cFBLTSJ@bhM#{r~@W(R8!>93<}&PcZUVR9bomktiQK?kNwc58?x&bLKU6xua z?gkz{sBvC7N_bu2E&G383^LJkxB6pogg<=EdssfH-!`_o&T8Fx#A$l*f??i{0AuYA zF8N{s?&5B6oqlft?h0Dx={EiECG=lXFwB+s&voZj<2RssSCo`6IW=VujT@Q}=jy+| zLD2v6hcGMkU(fh||47H_dug*5U;f|L?G|T9Q72}x^Fy~>s?C(w;T8$&)5!eQ(;xVI zHFq>D8uIxkMat;S@=l23%~GF~d}whRpwK&P=3Io26Z6_p1PXa>2!I9-7;Zznbn0%B$2@VobJdMKn*?H zMM1`J-TTghXC{|gkfX2WLkj0FLLg{Foyeq3VXz3|hXm{s3R~0#xjhpx;DL^Jm^C{5 z{uO6r7epCN%)%(dLJddPisM|WDs)`D7n!ikUiMJ$%mzm&xu$0cos#hcfnDWWIfCm) z18*C`A73@@8H0Eauv(|svzM&ZL2__;+2}&_5M%yGMO{7O^A?e;rdr7P&w^N<^~kM< zkMJidUizlJ=dVVjJ?g>aJ|g&7K-N~XGETtbb6-N7S-eR4Ws3Tno`fHbFEe|Ol)zT# z+?)HPpZBWfVP^fRyu!B;w3oR%Q`b-_8lK>=!cHlUyNTo^tHW9AGHd0g`kQX+!`Xzd zJt436#8)a1*jzHZS4bq-y3MJL6Ta=*YoYecGGx)ZEpu%OdDImekKnw{5;fWN8BOHG zJ6n(Qk-I}Fmru3Nj5~OOn=U?f92k~ik1IyCX5@AZ-;u_V2-TQ9Yu8m5RCd)?GO461diqkJxHs4lo24xB49K%7YnsH@*`6ldb zv_$6(-Ot@v;l-8@RACBI*adOX&tt$^oM$Db93kvy+^w*@xI~rA`$brk970fYVq&EO z@3O~;SMx40)BgycnW~rPHVBD3DsYPo(+@b_6#V?ypsU@rQ~wBv>`B(tP#kQeH#&_j z?UA$+elN7N?gJS36DTSOCphe6w5&1C>@VD%-64QmFk>u8U@li{%EJX)(EE7LNWI^Z zQ`&047MK4SxLCxvub>%Bf#tZrI{0;ioecp9trT56Lm#?OFnXp8(pN?yz4QJ<(p|J! z&V3s-_Ek~sx_0o2qo>j-)QhEa)&8RMCpFa_d5(seu*AqB)`q>iyF}jBSgMhNyji@kX)%A)YE9I=K zPN5-&SfOVY`{-vibp61cAgKaA#|S^=0KeB4t^Q7eZaghNm-oqcU3)Sq&Gth%2lflEjxo|h3>A;|6&DN|&iZ*k=K<$f6^iW661pT5x>z;PSkVbGwe8McOgS&|rM^A)@s%bcQluI1^t!db z=HSx4G_7Aif7#iRt>ZQIOzODR6%MQrb<%0kLJenBmq0mnZC5rW7K+UDBkPurs~A3r zI5>Gn4P5~rcIaYh*2DID&|f$_WelUA$htw$Jf-FrUzeFx8%F+aflx3MN+!DE_#h_z z+0ISx)y@8Rw%hSh7NzmIjd_#99^q1o$T+V}u z;)O8Ki~V4`kX2^g%~UAapTSLr6|4Bk(wxgW$N(h%becR!V7wY}`d{B0G6DKwE!4g-7x%b?#!nZkzBijQn?@PVR-Siq%1h*TD$_Tus( zh(~J9eGkcMc`= zpF9^gNHUmGc11VW1wFjoqBSf$758rBhzwhvXQ7J#e}lkiM52?S4JeM`Z_n*#O&&bV z3Y;PgWo4nk5K6HOK;%0LLKaFJJgjDxSj@T`RyRNq5&hm_efSdvp5UP6dnm>0^i~`$ zX>SP@%=9wf0DLqKuQ6`KyX#$y^TjLS@HPUA-A6)aW;Z$eFazN3+=HQsh?x289TZJg z9~p#C!OwT-v^iGh%=cV()xwOWk>lkB3i$2j0$-|^$czc6*{{sVf#JUy0%lA=GJjY1!ElMK-`|n~ zlgKIM`blphI5We)SLKAi*E68|&Kd8isX|wcz0XvoXQ^M%+nzUs+V6fq`OvJ=Mj0Wj z1j7KwM0-7SCJA!Np`}fi_fv4bQF{b9d+>GIegW44l_tzdCY~qJd!gQ45%DjPlbT2F zE^_=2>93N5@|Jaw&8$1m`=Z#!7c&M!!Vwr05f&~DovufA_18ilBl~2EV_rkISsmns zTlIetS_^ll$HMt?4?NG<@Jrx~lQUCQAM&mFPKn7{TRgio%_`vh5l2yIKJC-iud-wFU2`yL=Tmuc(#Q+4e}HSfHv>&Z z5uf4~njCCp?>GCA_&sp!qf%rTUEC%Hl%0DCxH#QlWbfb;qrkJB4294h0{1(?9sFo! z-{@GE+k{r!4@uoQ=VW$QXG{ldq%nWiLglJ{#U*dsCK&Ww8rB{R#4t`tM+pAnH(HONnYk1CUv9e z{G!6Nulo&PNFSGdN()9+_cyf}HG#oFgFJ!plJ|prbI|WPPKp!C2GodGvfSQz{d-Nw zN~fEGHTmr|x$hkYd6?$W=D$2!)tvCQnS0KwZ;&;>Rr4C*gWPfv6!u9@{Etr(pq_kV zIUzNkqQC`$LjexnEiAXQs}s#+3Jjq>euPaFoikT20;CwJo(H6CxS;y= z4(YMb)iv5rP0FF;tb}wrV+sv}75MIc3%t6Jwo%xo)!2j$Su|cxLT}XdunlWzVQMT` zVfS2etoILYLmZz0&$4Ya5Q_dHbGOjk+wExSM6XSz%#bQcX+e5aOF&t`!4hTp#7+jz zAEEavzv{vi!##gVy-_%zc+VJkzr85xWwT0Johqm#FqYG8tQd4qyDREuQzP2eg095w zuzIp|whR>$(?F}JV~QCCm;UGsS90d(AXSF+E=#d7J>r>8Eko6YgYFjei| zrS4gVTWMNq>d@@$Ed5yqc=e1*-~s_SPrdjX&gB3WmFi0_H>duliA^v;OPkCYrW_MJ z-ikUJ8vdgLn__ zIu}^?RtNHKV@6p`^)Es|DOIdzu$h|wdE!O~H_2&NAVS;&bf(c1TgHIz9wcj~5f(Zs z2hz1n6=@ZTyUqms+gg_F8Ws$eC!MEm*6%H?eAu48osm1W8vOjl<6|`(4ZoLpS0QtT zAUCY{1o}94kfTXl&IQ6fS23|bP~n<7r^oYURKGHXdQO$#oRyh$b27}h-*8(FPxNXu z?7Zb|_Ro`;bXB-N;d8&}mRRG0qD<)ihRUa(Gk?8rgH$pI_7QZMe4 zeD@Df!fUh@FQzotV`n(?^Ye_o>>#JV81dFKXJno}eezOkqC+~L`D_Z6)k7^Hosg9@w6A3|p2GlzZ zT$Xg;9;lbh3F~-$tZ?7Clmim&|Io|aSS@{~qIn-PQ9QlF1?i<3IzujJ36mr+I8jH( zpVl(k4DAX&<|9Ej4sep+aY|P?tl-7W`cpWH;=xq2 z^1M};X=PyKFo0_jKO@!E(V6o(pY#AlTBF$z2iCY{7`^Z6N1 zbgPKb@R;y6MEqJE5K#bhse9i0DmFEh$#@LuaTa)pQ>*_|64^JJI4mQ1k;Y7AKft_K zgv)&d0ijZlB}VPqGPd4CyoZQSZ*bnN!m~vt-XUzLbXQk{?B2ifdq9r|C-4$+j3V7? zQufy?FFAm=6~_}dyBw8k*& z2cYbctjwd{(pgykL&Oj2C zicX;)d>kkK*j(+;dmt`0Rf!A`wzRSNa{`K z5d7hLpHNdT?JJ%o2J3jmvIK6kVS308q|j}SSDrxFjDem%v$u)cTnu);HQ00X8@2IA zsBB2Sc@xyufE_8P6VmLPP;zZW!Zm_5q~W}0A%PS z&t^uAwmlhWvGZsvJ&oXCGlI!T*o?0xho~c-V3X?9xDhU4#qt>U3@29s2v0&{R{S=s zvI}OO1Nok@Xw}{yEY9*ou zLvEYQ!1v}zbpU<0xK5HA7#(hIJY z!ziE!KDiM@`ai$3#*L4W%=BP8=lpvTS7=gfk>kF^V@%Saa0c4v&nzGBr*D&k9bTRC z5zoz1-%q8V@BNpFP@T>RHCrr8B3k%VK!JgV-Fj%adfIdvhP;*2W6u~yi{j$zGu{x_x^o*^etNtGz1@r5Ej2RZQ_9q=HS)yL8n+qjB z&BX1IcA-Ccz5OG7;YW0YzYWJV!(|lDHGVk(n*0A?&-}Ns)E1#BX@M;Gov{?e|M{8@y;MUDJk= z88#snUJO8RH>O;ioo!8X*KjPYtyQxZ{RBLw0;<$NZg?8P_=x6w;-6YM>`oYYUC_~q zDL^7EPro<~$+S@O@H9&r`_btS* zR%pkSuC6YlZ`g0(1qs8vD=fp=Vdkl;n{ko0)zsuvj_#?TYLGRYsjSN5c}UHNQE(4= z+3&`FgD=isQdNDwrGfs~>g69+!3*SwGhNUqe0Bw$Zl-ffiG z%rU=(FC4T6-yb}F(8?~l8j)dl#@nmp6d^QVQ@v>Xv3|~3L2?C zm)^6#=9L@P8_rxnBw}+Qk#i-;H$`d4-Oi ze)XS%F33nXp3cj&96WdhL5t})eDnp@i^XHmVTktdHb#q6QW`GRzw3VIKk$UalW$)W zr~Iq^C>~;H$kKF__BTs0H*s8W5NV6e>CR)Q_LG&8?eYgA{TZ@oK*h0aA7g6Fi-;sj zbFVVtP_fXMo~roT(W9Y)L5xpDpIc!?UhN;j5sBoSjAGM{1*<5121Ks$U|wci)#2+t zai3PJijCz+g+gwBQG5HZuU-h=(|h*+K}`8glI6eSJR)oG2TZ1*FV zP~;6Y9*Hf_9%*oxq4PQ3(i;T49OKPP#SR{wz#<}WmnL94- z%=8j8LI_|QJ^#b$-8oNIdm}Eb=XKOQ24ILTa6uCwNQ3&fy*-0y#M!2=lwzuF%F#s*#21yXhoKW0zQAcY`> zVpoCUpC}_E*hVETM>ah{4&-tBP$okbq^V~vzl&kK+n6EL!=`j-%6Nk@QjSI zzXKOpETp3DN+NVJfoLo$$sc17y}jvj4B+C7F!y50?t6gT0JeV%TclIj2ti&;A>Q_c zZGseXw6X1hk3UxC!2S%l0sb4P@=~8K11ONcsSq%L3V>Yveh1)iy^(x`(V7y+CN9yL z{qdVtx`zZMfSw2-;BHhb)awTW0~JQvEJ+!3F;~uCtE2F%O?ZLZ&&tP#2f#J)=0TtU z42$y;^Ehc-8r}f2y$nP?;E+{0ox`HT+hdKpp9J`~EVkv*W5bDHD!T0Q2Ly`QAD^QD zMOr!+zCep5D@%+DH}gOE9Do7y`=$Q=zySjS_C&!xY>;LEAMe{7eP#8SVdJhI$|x6M z|7f8c98kib6l6t7tG2@NYpGZUquK=!r~9tq;ZW02TMMzQp0m=vxCc*#4-yUbhY6mk zNZW@on3MM4&hzcKm+cl##N5h8B8fIWA)V6Lq%q{FtCoWd-x{+<5U3_t6qNW|ivz84 z9R+NJ@6TXY>c0|pegj|rldofFxb$tx5Qz4NX8k@7Fl2fMC|JxJgyBkZ07_QVAB|zb zx8)v;sqW&kn1Gyl1p+b?zj${!G3e+_KXfFFame;%MuJ%FZ0To_1v1|_F3rs6LUxnL z6YG}{{(>W!XD%M|vOh_TMpk}=g-0X_spvcy$w;KKm|gN0_TR*LYGSD7@A>P`+4e6m zmnJ7ZpmzM09R7(l`iDL+G^`JfXjN7-w^I}aHBBPYKY2A}V}(ED7U_(flJN(rT22^t z_NZz!8RZq_?SCr90W_U%KtmvtSuYrFmAvr1`VR&tQq1bHX`dyALXqwIo_)h&Zu22Q0|rsj&MDmzdu2M%=v%++QD9`bL+Pa z&_r=g1B{ZH%e6m}=tGi_PXoSCe09N&CM8iGZ znF$3=Aou+y`Tu5;BOqEI4@o`@Z_q5oav5Z`E`cGagjq` z1}{#A<{gogb2wl5oh3*50th}x%$%w;GYM*gpXYD75*8jX@mPPGczCbMdObC6dtt~B`E$Q|zf6rrHac9NZGm!aW@+-rl6rCNzRyt>w6uH%;FrCe&(S z!8!{y%NFd1)+vD`;m5ePRi zEF?NGIfYyQ^dxVzTEd_Zl(oB(>Yr6VPt!7TafL)YT$tO{b3qcuUf3mo!@g2yY27JkGYJ_ z=8ZJ{Vt00D8`Ve9iTNmTE-zEzcoY#n&qbjiha-F~if^@L%9KvIXxGJp<-nEPX%LSehEdGjmdeN65PeFTld0_4e?90NMnuJ6|X_zFB zLxDGLJeK_RQ(+Dr$rG1VJ0AGdTOeQAs?`T3nv+cZ4-}f9Dk64UVs!Wd;vNU>P2mk7*Rpu}~tGHN%$|DY|z*8K{4=!8j} z^02Gg-hW=z{!QHcBLnZ@!zIn5nVo=Lp52KZGDq(J;$U7ri1HAywT!w*_o#wW%%|?A=cv2|JGi9-!lIr zTLiRDfGk0{qqV1N{iJAY2b~~+$pg3kB0Szz;)e{~BLOHs;~n1#SytT(NDQ?GN4U3s z_oma^+=0P*52Rb(A4d8u@;!9A=5-DjMzox?enV_E{x&vC-|sk5mN17? z)!1P`Wu-rvR+(0#xa`&6XSkFeJFGZ>$lfT+OcNb#HI#$nJB;d@HgCm36uYL30 z@`wNP34$$TGDvK9C{m7an}dp<6^HL$-{ycVQx$xA|H`$2?d8vA|KExtV2luE+j%pV zEj>4wgpt*i)>6U`_;Ry=7xrh^!NzV3fAa7$EKe?=PtaM03g91%M+mFUDt(ILCbJj_|GRE zsFT-FLpiE4lcd+-n|){LuMP4P)l!{zen!84JziPDsJW_9V0PzXQ8dafx2pDAa_~6* zU@Ak>TYJJJqN1P91JmkhD`Y|96KYM&4iXoljx|1sDy(kAB(I>54KqB>S>d}5< zUSo8dp$fyMQO)n==UZ-UeFoUH&sux%mH1Ii4W@Nr6K|C`7WgdU3l8(OY_hs?Jk6{@ z=Rp}7g=1dI07}II3hToseuEnW-Fw;0RD4y=&dv_M+Yph0Q155ozxJx1G`W=$YRaB; zc^7~K#W`9FpjmfV`+W6yzw{j)sy5rc z_*lN8?Id@}-P}N)xz#^zl0z%OOS-O7HB24jn3yq@0eS*TvSxz4n(hf^WCG*mk?YQb z4fv0?HK1i(M(Vrn-!5CZe{}@^QmV+<@t%{e`ZZJt3_NG#VkiuZFF7VEwp@Lk&_wR=m`8!!>uu20x)o(-bdJG*Yo># zH$dL;itbGcHNHGP`|7m0?zT@3$;hCfp?@3@iqQ?7t^JV9xh-~N_c+jUQo>*)x7m2L zhq*r@fNXc}abO7(-<;Dg6jBJCTmKOa2M6NfK*n%PDH%UO%*&*Zv{SL+S>hmibvK)F zU!_6sijrc-GqWWgn+4J+Bs9SnKDWqgB7*t#kynoMb(}qs&!^J0jhA9CVmCG*@9Qcn zZ_TYACLYvoYkG&I4C2sw>ooF-_PC!Gdnki=khPk6KH5x3{kw}ow4+OaN=iWE&8lZv zV#hy?bXiKyLeqR|8%jHB(M0Af&7=cy{J=Rk_IRzW$DfH`mfcjY@rQnm9a~Fqs<FOP|uPl2sUws73v+dYjw)Y1B^W(0h*40B*${3ni))IjKw7H+hyZWOp z56gtWyoG+P9qp`_;1~VGo4nJ=>ApsDDVL?_&HDGxWiZP$yq((de5$Tqd&Nz7W2oBJ zAkqZUzdFx=AzKgmUM;%B0@B2k?_##q==U9bGyH+h_6aWN6q)E7j}SjZzNAP9xIDfW zM;gM8R|2d|@>9Y`1B`JT18l1Un83zw0kpU$QOXLI+i5JN=KjKnRXBL0w6S0lRGCdP;o5PEK% z$nD=I6mS~B-x>gqG;XVEY)n3oNfHzgfYEQy5>2|pjJebnY^c$6jrQy3D8A>}EzQ7j zI1U09*1_QNJ`m(MUG{b@qZbU8+7$9;o4kdVmphkg?2U7YxvekJpDEmerl1cC#>7`1 zyKJ&5n;X*ARWZABJ0YDWe)z{+UY5607lub`d#^Nk)h7|+9s+iN3vDv_4)vrtdMuv>wa|}humhb{HwEWl%g76-y(}`O9qX7l!0_Y z25UR3?idV?lgs6S4AzmStvy@g`e-_}KHk(-Ru}VL6Idg+)0GR5A{s^#qut?qa2PIU z$|s(Q@WIuZx@*lB-q2ZFvfZ|TCV;q;=M`!7a+J4YxkJWBCwkWCR)MD4waLsesT@`A zBRCf99H=wJKUVxgsMc<5Ay^EmQ$xDi_K-?%kMl$$YyC`CNLy|uR4%36hbbX6*~!s_ z1U`f@KO>QYyTgEJdzWZc)bTCab4h|Ir=%PixjE0TOtYNJ>ovr?yP?+m<>~BDD0p%- zZq*{|8*lO-w8DYmlWC6*Y{Fh;t+a(uF!ga}#gGT=Fp({`r5L#0ykEO+A#l}rml(gb6Qo=jQOJ zXkC0k?WY&n{rqw;myGsH*QuB~i?1pxdS-3Ja(uNo-m85?G;IuhU)t<_QI71Xo5K{o45wfFn`U>sHeNb2Oq76 z8ucXj8gxZsbS%v`ULp6Tow2@sD;RrR>nPOq@kN&5QrjT)Tf>vB(vAxS*=wAxs4EPm z-TkH8kKoGC{z2@9I%<{6`f{S;B;0H{J$hKvZzuF|nmyw-+sGK;A5-NK$7BknfsFF=(ah;&2rd_f0t^W`~_QPwc)DD#X#E+Skvo$~A0!uqkX$^rbl718Uf z-BZ^XC*SCsuAj{oT~%Wh0=o)Kve$jVy;wFpZMSY2OZ&&-a;46XaE|q2{TN`;20qX-(y~3Jnb10YV%8&QVBpzK?r~nYye2wE04`G zqWLl|N6lW!X=Ucx7b9*b`I4p#T0EdFv~cjQ$^|tsJ-kpsm}tpm+X*I5ArXTWtd}EL zIX`CNl*xjMmiLjeoUc`=@CSoa;+3_2xAZ}&kWKd2GRNW`5>MZ3KW6o96_rk-u)qP7 zZl$ii@Au&!PAq9~h%SYf%$7*DB(O8*?is}x7;GJw9pz`XPDwvw;c#xQ6=YoW+d{!* z_++`Q-ix;LbkJ4Nr_Utj_0NmKILL%N)WL)KpOVhCVE&rjo5wvUXUD8>T{dZq3 z-(NRJVc;pP(#)JA$ZkubhXkA@HjjzjS7vq@&yuPgkIfLI3C6HL6upmK5P9uX^ zWKXdmk$4()$7ud2GgaO(s$ZHCt~WM(SqDGgQJ)Y!MnL4>PmhHlB7nE*Pi}a1D!6>i ztanYQq^5gy@=%E57=JMKx=4*`k<@>h#y8V6h|s-7e|r+$LGs?vh2By5a}U4Oi(Uc$Rwn4msHTw7 z7Hfv21R^G?6$d&$)ng6Y+FH`PXf%|)w5Gs0EaddhEw}Rb>oVM{#0Xjsx8-!lD^ITCK^KMi+!uu;QqW+f zd<%kDg=TqH(hhDCTt7%hrx2z9ma8@paR2pd18@gvaZxd496-mW+NNMp^0SDN>yPUS z#zjHfD8dmQa#xU*@oAtjcD8QJEtg+vpadq{GlKw&^xJo-iP1nxUpCZ69$|k5fw-BY zmfPge;*(+p#}^DjbPUGeCuPi8ywg6z(m!0vu3mPZHcO>PmZ3PX98PYsi^X`QGd5Oh z9Z}x?8;@~X3_K|w&4JK%W(9Z9R6_j_Uq!{^7*8C?26Fa2q=;tLB2p7-fU!^=r384# zYj3x4?JnEi(^J{+Ow;(oj}|mPd*#1PRZ)r5s{)^xchpyiNw4nnL>W0*!t8E1=YVb1 zuld+ft2OFzPOe}9lASkP)gYJK06!^jr+@P$j=YL$6#HTP_mF^q7`v74_bjfCH${|U zPHF`auB>g)#Vs}sXw52nHxSB7^Vqj{3eOdk^-r$gb z`NiH>kBayD!PDiGzxjPMe61oRAD5fQ;HR?4EHT5y!s65 zMB^a1NrgpRgxl0X3v?Yvzjde6iTJ#611RJWYj${2dJ|l2dxxw-7kpa3*c?207G%)ZO;=eipuW-H?8H-fCgY&!gJPKwvR(ieJ$;?x~>H$9Es&> z;r1giNA2_O+>L_9DRkS&aC+0;RufL#c}olwN))E z%(_z7Ej<0+k>v@mM+%7Smqhq}Bx+CJGTav-AB7f_^Y!Z z|5o2k#b%)zK8Gf8SXXa5{utPP|5_h(++zLw&`9QMHuXjqK`BTf&ptE+JoOMngCID3 zv0UZBPxtt4RL7lRNx%#uVE-^Fq5wXin9AAiL&?aaL7MxCX2)=FVxmTrHVd`S$PI^= zpTILh5Fn3A^y>XV3AfO5amdhOPsVjxLrCbC(02|<4Y|r94~i%U-_rQWH{n(sYK_bB z7liBY$&HtgOql9uBt=8adZu8>VJ5yGnbFm{Df4(&?!!%b`9*o^g-3LUszP*tZPTXx zjIZ4(kx_D5eSkIN7=I^XRnJAoYJsVd`-BdvbkmdvLawyH%=*49ERz4ksm}Fv8L>ls z1@hD4A3WD!c#Zj}wDESnrp$JG|=#(?GB8}2yYNHn&n_ij_ zegWc-N&2rzDZ@6~Q2^C@jzu%uq#hg|A%i7!^#FVQINHeUy^;69{i6U9la3*q_cD1N zqmM!c*!4fsZ~J28*6U|3@YzQQ*E@m#BIEeMRMMRLm7Dvz3OJ&|tgU~@Le&yR^&j0|3>OrGkow}`EaxEmod(DUy1c!cR#?N2sjU1WQoIa z4u!_2cQgy`AOn)o*O+>9*VRt-bM2;lGv#a;AB#kbPtz$H)-=N+QJQzXm9l`bCLj(p z^$kg?z=Z;CXQL0wpUiL!88>rHX_kQ3IYdsb|92X9{zx)LtG;y1%k<>r;fDF%Z*!IhPe*B5y;cyU?cXDw7 zkjINQz~M(+{&X<`0lX$VSlsA+F<@>uCFAc!N6)K!Vx%-9t&x347#rvAaUTR>jSkO# zp*F7rm^7ro$?5E7Ld*!uzZ?(E?$ydnh8r|NGVPyl`yWlf-w6(Hh1PCI3 z0d7CA$Tq!wrJbEC(vk)wOLL^Q={F-Z;(n|X z>Sy_w-)iSw&iggZ{DJG?rP@mLfcFyiSdx4n`42CpuXes=P?!F!G9!T@5p^djt1_?i zIP|!=gb0(i#>cdU{kJvy=IsdMlM;=4zB%ftM{k-PjVp+xN$CgaPkFAgTDx_SloaqJ7Sc0GUCgSguUu|k zqp?Z6UC(q^2tSF;FRr1vIWpJYMS23=V#U*!&fK~5KC5+G0P~TwR|z&3#*)U}O_=K= zM;^6q^xQQgu;0!ksn7dHlx*JZ{yg5i<2LwN--YmH!>3$zTMniX0~x+Zv!0YnB3j&B z0}&v=DWhwVP{2GGlP?kq$hTvg%#4gl4m-rG7!BM@}J1xFK5b?5kbc>sFt z1Uz={enD7G-;|84b0y5y8t%XQpwZ}(kpSJrRP%(*`LLO7hyp>$yjjp1fW-qfE57Dd zTuV=v2$*w5)6W=R&2k3R%h9N~IiC={V!11RnilFU+G;`F&kOZ_(4HJlK%^LUuG#&B*;y}Tb{*F-7i8z(@t+lG zuj!0L%$iawMEAK?_aEE;_%h^b#dZJwAcr@J`)p}_26pZMGq=UA%TK-I<3LDC1;P!2 zBdgu@1MyW-;kOop0?|s2jbz60YDOy8N`$Vd?t0}Ur#~)t*B3P{p4qG^BIet6DERO~ zdS-)6l(S6dpx$iqma`M<2EH+)c{|vf^hR5P9qVMSNtLw)dhc<(n|4N;3qi>L;^h{@ z@_GVvrGa9j=TdzaNoS{n#G$4a6612=vP@Z{Q}E%yR)Jn9VaPS&;zd zAhpLrpV@<4mv{;wAa1Uck(+`7o97m@CSKOlWTc!J zU)_QX9nTrD z{G*3rlexYpzY^od#PL&=b$PHBMXf(xO;_nCVA1v6+`-AY)-70#dyK+XB*BzJ;ZT9+ zW~cy#K}oRT!NS9#vslEcvN!mC>zIj`(pYa zzW>p!+0#>ouYg}|a)OGVP3F?wq(`sW{V^PzNR9n=^l|4Y=!yG-lLEP8n|KPkB1`>H zE@L-WJXdS(Txm%s@N~+&*O_Ravo{M*pE-r7GM9ia_YN}6yoG`X>zjR^lq-CZ2g#@) zA-f}ZJBTc9DEx{kB*&FHvwkrUUy+BybxrnqKx*kwLQ5^4iC95~{3U0(3GJkI1i)DP&@OCKSB zvzzw<O8t2DgG{Y1?{7;lD_yPQE z&aYW_pq(de;s&80-Um<<3N9wS%h%cwc+4-fJVMgT?@k_bp$vg0`3o=xizBT9Bp#@k zS~Iu=Z1az`IeY=tTp39Zux+HLkBAGlI$vbJD_^EWf3GH(M`b!b<+6dAXwCn2G-->h z7ammrrmeo0IjN&9{-i>QzHuHiI2-2gKiV1o8W+xY2V?MO@bJOQ{tKc=)^6L7Jc4Il zUwKX#*m-!!#M)m&w_8zQpue_-CW7!=IAtW3q3}I+It1TKbMo869|rbaSIDyQ0OId< zT~9lBZ7Wxg0PlD(@JtMmtBfIs@_s6Z1nmQK#uiXwJU2W>gu?+P>mNM4T<{ocIDXd$ zfpGA&msbk!!@`b{Rv|r8lUgU_J_jU*F&+V&>uB$|H zWyNPh0e|y7EA~e_ogl31NJLbjm@JCeN1*s@z4xy#ahgq6Ygx0pd42`=AwD zQ~24fqISKstAZ|QUSI@ER8*a!(`KcohA_@l<&5US3ckav_H0N2&Oez7*a0Vri9mPvn$?fuTdukdh9 z1elCikfnH6N*>FI>uH5&X~PgIBRu|SH3*0Nb2tRw)n|~!G5AD6P;v)|gvG!;c#}H# zjDy^t1(=DA2*x-`DCy_>SHjS#brdT=sig^dft$)Tu^dk(CYSbed8+wp)g0FC&VT*j zK`PUZ{T5s|l`@Mg--p)F1!U>x#E#c}IqdBYQ!q9Na{7p@ z1*8v5`VlRy;Ux&{P!S7QXSt$<4BLqq!;Ao8&B>w0{2`IPpLs)HSpb# zolb)j63h#;zes}eLJS;M@tAEB7i3Jo)gZ)|#*hpahxL+ISIpqz4lFo;@~su#}2kO;nEzJxC!}M^aLVdGcCC6;g?{6cIcL~>SchV#Cj}W^Mk?{ zrr%lC3DKCPv{Gy38R=s6m)tX(_8VG|ME(0-;LGy55>_1?Um)=+r0MSpa!cwR1y8<- z_;xr_-+QnjFT}jF&p(=L#$e!?H@7M)M7#}k=^NIm->%;-BlF=Zv2Qt{pmaF>zTN{$ zqtxCmF)3L;x3ZiCuFATYpCFOZzyogH*$7zd+y z-`>%5i5@~9jWw?><)pu)QR8>I`Yb6{%${q2t8E+I!Dwz_yaVaHS{A}mLU|~;uI;v+ z6iYV8j^+L|s9>K6J^{?1=x*KZKi6~VI;RMZL0?gHdz!h=0y?;VzxBbpHfC?})=F|E z6iq)W!Ui;RKn9%lVV5~B!N6SvjrfUiUR27YPB$D8r;x=8^@%d7t<+!qZsVT1q(ub| z-y6Q29tmd-f|ghc+*7xO@0(`W*@))6fJ~v_>2RF?Xf7{Ja;MZj{O{=@k{uG&YZ0-^fK(XR00NJiomZ{B-XxQ)r=^ z#pi|N1XYq@LhYucXGTMhk8hEeuQK#xbz*M6n~s#-;xx6~BJ$4d{8iusrIWxn@E|Wo zi>!L~_4IgA1?}x-vR&ghT_~N^O|NbuZtVDL0Ur)yiU){u+w;OAO{V%o`R-T^21O#z zfP~bqsRWhd!n3fv0-GVzTBL9J)|}q2QhGgq3;?ppZ%Z4uuM4LMYL~63nJYJs^0GH5 zNq2Rmas^;9x5b3(zm{z(=AS@6D0rx3yFCCzAQ1YMbuK;pCkzelpI33L0tkUc`6XEh z6ciLt7g`(<;l#`LIP3fO??TT&PdPo!0tMZU4-d0?zXG~LwmvK~8=Eaiw|J1$?+m=E zm*Z!AaeLfAvy3k`_^<~g7*N!SN75I*oleC?? zSy~s%v$3&hI+56VIK( z1Jl>L$!l{naK1rlAP)FwTB`%tHTr-n)Wk-6jf5Bt$T98@;(o2$0ALx|p&0-M5Kt%G z65s-S@DoErIK2%Z&mtmQFBFIw8^?kW6nQZdbu1846uF+rFDdm5)Fdbf2(!uD)SNQi z3Ii!LCV;+5-zUnc?DPRDIK${bgj8YZDOD_N(nd|5RlK#u?!t0|CsP90GaAo9AzEUy z0CUY+m7Cw9Z&+Wbbsm7G91x@NPi!mf$GJ2F)cJ$aIm9V}G_H`uowmF@eW#-0F~98v z!^gFBD0z*(_ho(m^Hhg%awWPq9V(Ivq5*b4EjSqrP{bMc;)Q>t$2ul>SBC37k4$ky z2xeJwwaSACLil%_QS)Ea9gJdt#)rjei9lTT=KC|pt(%F@4_X)-P)R8k^rkSW}l z=Mm}IKL7oxPh*l{z8l zSm~abL!(H~S6!v{q&@Mem9Y-y2E%kaMSU$ z_|N!{8zf1kEA(4^k6u%&paa+7i}Ve!>I5+!%z)2kd>RZ0h{aZyiV#Qy`1zpbPihr; zisAS2Mj0p?<1XGg6_fyV4<-`<5fNq_aCCbX07U%n9xG^g1PoJhP!h3wDY+IjjcL2= zyX^m0#xB5L!A$=kQjdoC?K4sJtMMNT4NAv?Ox63mpK>3NG2Y?^Nf8)| z>9fPm*zZ8_=Jl$FGRdCz0UU^?WRd^lTI!RvcKdYRwb(rO4sstC4Mj%(z@j7$eLMa?gIS|M;vrxakMaIxC6U#Y&7 zn=zQ0`Qjlz4!>R@mf`pOc|g*t-PePlfR3?h%4Y&#O(fZS7*J=ikO&}BKsF1jq$ISbB3pn zUwu5^d+BAumP}fdyry%_GtkJV3FWW?&5q0z2yjeu^z(zV`n+HI@}FmSggt#H&kBH) zp=Em;Rfox9nHAY`saGIhLjT~S$-1ci9R{VO4{%1MyK3NcMwf=k13&cpYcw1>cYExP z2(sYRyb&ldF{!4hQj;Cz|3RniHb@L15{5G z3>cvA#-%|njlZD>n3d12nJc~$(0y8k4^Z8P6$LSTC=;QkCZakfiQJR^Ovi8WsDUcU zSJayqeF00URX^r7vZ(VH8C)6XE}NLy3HCj{V>Fb3HjtSRVH3n)^tl?ld(x*Qq^Hg9 zwsQjBc0h@z9-vYZ8qHnQcEp1ZO7#%uMv~A*FF^u{7E?GpqeXs-3sjxxustgCZuSA9;6Pgl?`m4;brHR0Mx>(+?d8Wj_%RR+!Gu0@AjSdk$YZ0sk29EM zH-nVseY5Z!O!5J^#9;V08FvSO-VozA-fPCM)YoVu_juEBGBFTjU^{6U#(SO>qdy3kV zMK5)%`!OF)1Y1+9pUKGe;C6mgtaqfR|DzoaKlCQoX<82g#6yTF!#EI&uB;y`p(BCs zMaip?qq5xB+X;Mn#RemU0RaIa+1-x!`3wA1>Ez464dg?l=nb+~rFAO^D>D&9MtFAM zD}jEG6!P8}&R+!zt~5&nX?|(Uznn#XahJggLi8ZP;l_xQ=)fuK?c3Z&FaJg_nE3FL;Ap#CxLD$# z!fC;Ur;fOqzH3r?L+;tBc;AJK=^fz(nqsRSj3i97?v@U4V|+6t-n1-u5PG5Bj)I9% zr`^>1Bs&>ew2QFbs2h;rkl=I~(MM=@Ky`b;Hh*qO44^=f;OfIOAooO!_Zh9|(y*oU zeUl0z8M@az7J27J%a}$ zAv1ZL=}=#UB~zb98yN>6?V-^Tqib#72<4>$vJg1MQlz%YNV${%o+}e@Cp^N-$}>CP z2m~n{cMjcQb>EZ^iCsDJM=RI?b9YJUT=zINsmFlYN(|_;cxxFJJol(zFFs+Ku}a&D znUy%sU~e#TF{8Mj5XxNukvF}dU#9;eEewV?K=mB-T;%)X0GcqS+6eXtgbcJutU^Yl`#Hh?(b zE`ZH16L|m({jaVOU#ghmd!C)fcjec4SnwtK5+`AoSTb6s7g`@0j2K%)O}WS1msP)@i0lu`g1}MI8AJHX9suj+vG3_4sRyQbAxhpDM8vdB z&HT5im11{5`hemHz|l{f&~z9Gxg1w_z&TIIwiRyB6>GbRtc#SvTlt~ARI ziYu^y78+MNT?I2TY;AxU12{79-VFEa)zSO-m;>W#dx&c8O7aNl6RUX^bBL>3ft~M2 zih;5E-l2OPQ5@-{9X;s%dnWCAFV*Q9ussEVfS(6~a^65B)Q$Uae+gqZ)ShB?qBqOc zY54h@BA|+&f<$?sZ}}^T05NbO*Le3v_*6x0R3xEsrdc6QuTPGiqu}ekgEB>8#ya$a z^G?gH;wz)1qtQ;Q>X}7S#KV`D=+K@O#m&r}XJfWQM9!A}=_f=6e&>xkW_Bh#LhilHyRmvWche~HC zMGnmpIncKml>66moO}AA9qy=a>nD`rU(fO$b^lfbv+HvRFZu$Ewr{6Wtb1#t-AfK%j(GX|PF$ zQX_aJv7;vSxxrXP_xHO0>3w-VPjekB0@8RQ2y`yE6Prsn#nWfM$flv6XIcXkIK168 zUb+H8{QI9A)#o4V3HY`&cwP<;4pMWB6~(Y-HgQxz6>_Q1AN+ic!x}cI`XTTs-rf%s zduSgBTyQ`BxXt7k2Y?&ecK=A>E`B73i<_vk`X0*=6hHN_zeMR(ebNV>fs)dm??aO= z?ZWWQbhnfGjo%)Xw=WN*wY30zNkj=5Lt76HoNlF3Qts}T7TFOI*t~pvJwz#x@2#F= z-eOo5t$q zoN|5Sb{tA!Xxkb0`0M&C6p|g`Lcrx85`jXjnxau+eNaBE-^x0vHftW;p_KW2^yvxB-ndy903Z6Z3m@@8}P29K`aa@jb0`u zV35n$fYQ?s-23+~$;=(UG|f}%ukq{8a4z#yb`0`_xHW#nwVZzzks>HFo_^3IhhMbLnJ ztkhpJv{T%Bsb&ZOJi{k(785EK)7>f2k)!4Jc!KUiaBG0T%LG)GVa7H2I45-WD$F)` z`HZs(jm)aus}LZgjC2lJ-@yIul>&ALQ~#TJY2GZ;)&h(3VBt<4JN5kf^1I9}khKIB zlhBG|B)*_4Ej_sUa5cLy>#lTUMiY3_84F2UqnI}%3!coDasdVBHrssUVwCb!$WmJ# z&--wtrVXBlzzm*H3OqEDZ{EafCxGf!ND`NwRjUL8*wTEw^)t7LK{I+6VCn-k%P
(*k+{$wu25BS`d6#Cd=BBfkSO?~3IyTy2cG>}`7#5}0Z{Buh# z```H1^+RGyCvN)Pw|I$*_7Z_3Hh9WRY@#@*MwT-r_%Wpp%L%uY5$tNDo$19(-4$fq zY%TVj96HrOzzkBR_-0&kn+>L)dVz(Gg#(QtZ4bvOlHxhp_&`W!rS*a@V5#M~gM!{X zRG*fHiv$3yWv0+N%)^yrS|E2ehadxCZhz{tXC@CoCvUgs^}rDR3*fs+fcf~z+Fe9$ zL_~l2r01#3%o2t#N{_NsFrf0_t8-%g=B7_MS>$f*t zqj4FInk7v)ZCMF9$gX|-rH@*DZ zk6`;Q3TWzk;9;1Y27O5WZW;R5km>Jf0E6R0Aswq|_eCJJ6JVo9hXMQg0eHG0ih10w8#K^&%x8q$OgpvaH|h!mSu8EnFK zvdZHbP4q z^ua^cKJ^*FJ=vb&z=By?wVtHa+JZ#d>U1tTD!Bx@#pa&T*1%I>89<~zdF zJ)jp`Y8e?STv~*gP$)yCZN^1SjeZ=iQ0foPJO*%oJluK=hGH`YOP?Ov(|`W@SWmvP zd4tdrq}=AaLHE=D^&*FU5HMJ;(tWxbzzWz6p8(FoZc`~8hN-ApOlY+1_W^ulW0^60 zpEiTRu2-oUt+P&*+jS0WEI&wcn;N5Ghdupt@67;nY*zq?)S)Nqy!BEqkpE7+wZ@@J zx0=*YMTv+YUd}9?`E)Jrx`f@?Yw=g8Hkt4BK$6g&>}&hdJT1Cf6~G^f=5`2OwRq=8 z1o6uH|MRj1t<2xuSOCNY456KsJ3=Kkw+QEs8Q7jCj^V5gW&&}uCQDa=a`zRQC@6@z znjDF$gplb-(P_}hp-K+#UTSxf(}z>4ws9*VWJz2Ma()*LLUUsgMPF*k#lrhU96Hgk zNusTx5H^@Cu2!X$w4;@O*mqOX+~=byHNZn0U&8lf%P(vm^n3qb7lR`I1pA_RDDYL6 zr>ml|PKvoS{f^K7dgswxP0l342&?o$O^%E8ml*}s;_m4R(=R2k5#gB^$A$lYx7?^> z8y^B9T+?^I*bp(C6hA*V`G0+?w!={|*d~03r<#&TnttYqIabRqzxD11nH6!_7b161 z1b_UomN)GG30VQpG@Cv|3>}@KPPyh8!-9eWkg-xc@3o=k&h`Df1y@8oY{t`^Vl_#3 zVK_+9v;1P-f~D~9c)tw#Ph7X@KIM+iA~Zoat=<{>6YHVW$c zuJOnDtxg5J->nAhdr#{sH(#p~x$L@H#U0C+k=1^4UO@#3rDywiSydiC-E}0pe-iZr z{5pi#fk5krf98ze>vBQr&-w8nG-w>E1}t_wpErL5Q*7t|^5%#6(TbJbdt{;_y*od6wV1_v0p+)thZ?=L^zs3BKA;Nu z6h^+bS+QjtEo0%4-e0`=BlPrUe5ZEuhfPME@qYV-_I^SCHUUQ4W5po5-K#J} zZ!`>U(gn|L*In1XQekghiUIq^ob(mpBE51fr4sDpQuu!CSO>@0b%jUngRuTNZ{!y@ zz^|F7D@h^}JLM^VDgzg27akCNSp!USCp)8nvouF!)vB5=QNPVEd`#=8`(^IOv(Y)2Ix%CV*(KO^Xq8p z1&A;u;sgsV3NjEh3T2ie;y{dabHMD|QSATu)L4xdn0fE77q^Yd$@MhlW_U7~gT0{G z^S8+)ooe#k{dl?YU@opom0MD8UOfVFVZbEaaOqQfWvI|MuN2lyWo_*%e$qw9kH^D5 z_0<39_V!#&OuhiR!{6tvil#k(l9|q1*217sELc56HGX$=toX_|Arzr4y*XPZQ=(#T zaCbOVacO!^wxb%83&TD9LB#i0(zLJ}rTxRp1y0lc&aVC&p&;yQy{M$~SL8xZn(BfZ z{9|Jq7rWz@%&aIPAtMP8$|}F$MqXOFwVe0&YHJT|jY>QtGB&x!O5nihJl>)kn)-Q? z9Y$OGV8}D9r8T&j-{DP^!0#K$>;g6d>4+m~2ZtzG%ogBBk=M(5F|HF?I$c^=aA=#& zNwP3i<_~-a2()y!)H{xjn&(SZwg!pkKO~y)t&GQ}`o|39UL+#%)g+ zyjEM(N9K>2h-yj|E@Q&;vp3`-1lUKDoD|WLxnpK0*gEh;c>4oSp4A|9wr+noI{t6N`x0B2O8MMg(_8|Lo>=)>T7DjXr7>F)}h z`62f*P2{^;cI(?GYdjel6#AQi+jVI4>dsD@4wS+4=^WFxUg(4G9iJJQ(zMD5Y=l2G zp-`F>KBGeX&ZFuTV_+ul?(R`p7<~UVQDf=@w=nH}6BCnMI{y!I2~D3tmY;r639}^%XOhEJ&j^d zo2B_(f1BpNK7PoccvagJQ15<;!JjC2Oa=UItRS`=p|eoO^YdI-SmUS9#>LE{#6N#} zH%Cx-E+-f3&s4&4=5Y^Me@n>8MU^`PpI*M!X?R$|ou%O~OREdL{0`(neJPF7FNQO? z+)sk}y}g*;^j+LJT=RZgD9Y{V9m~!0#zX(Y18r#Nmew5D6*x}6)VQTG-)4nf16L`( z?IPo0J$jV;>Gt)z*BAu4FT*=R^hCP?g}B&98ET5nzhdHyXio}tV__?zRa5N5Ah5SN zN&rz)?_;=+4d-zswu8YB<{tY1%y+v#Qru!1Z4D~ywhtTZkV00Fv=jG}FgwHH1HVn> z&n;C~#lO~K#T(2?7|j{UoOcEO#wDu6;=69xLq&bakTHRbYq_RUgQd-My?k?Jy_h&*Qg*Zv9m)* z$7s30N`;%hk&@K-!*7}j)f`h)UTFQ z+Eju470Pr5*`=}YWOOtfs=y3yA2GDbK093MfS_mkH-686t<3nfh70lsLRNzP;KQ`n3M z!-=ZKSxK1P*ta72bJ(!x*53TJzfaIm_U)^Hpc^9jjGK9*_bqn!nd%x;K>DGsR2u#) z9s-|H?=gWh0V0jKMGz)VSXqNXG3H34(~@y!Eo(Cp#mO&lLFU#_xQM@h*nu!`J04h{ zJ!f9GdhXX?&0pNge-MCaLc`9=Yn~EZg?rn8c?0f?hpI7@+w5iIqxELfdWNZIk2;b# zw&CJKIJ@h<=c`?Z+XFgX7rhj0Lnbh!P6_GTBNw9kFk6GoSM1tP`bFDIG9GUra6$JGC?C) z&*wqZpGfzVk(KKN_>}0VsP_^bKN;m^1`>TPI#xRyuN+<@6ye9HaynZ_Yrh&B%_^Cy zH#8mpYzfMm=i|~YgVPKEZts> ziSiza_Hdj8Y;dgu+#lLXnm>$8uwL5j_FNPqvh|h4<$A6`Vp{-H8sLEXH zxaCY+*{pZSXLFhg|vLchB{%7ftx|5P?`5r7>)ss=2b4~Ko4{4;$=1Sc6p zk+M3^4_iMgh$H84X#TGDz&@cOILe@Z?ng}{dW$5Vc0tLmOU^FlI|2hnLg#P0xL)#( z()Yu>JF$==&TF$Dpc71UYx7eswg|%q2Y6lI)!ij{iEX>2n~K>J7SpG91~`=ll0l3$-&gPmH>FR_ry$ z`A>uyB`3`&f(qFSuNwV_gO>6V3kHM?mX)BichioeaIW?}1y8ZK zFqt8LlxzOk!wlBSPegdL9z@_?lTE_E<9|&>Z5v|mpMYtw+l7XL@uU<9qf;^UBQx4@ zhbB2+KNS#CK6@vhFN;8$lJd}g5s(&tosCrI^C=3YIZI&|Q;zh<$M=Yb@D>&*^Ll4} z9;!A?0$AD*L24K3dw|uSyWRb>1{o&+^~=iI7aYZCD6GvP)Sb#$+`mujhY@hg+1btw z(h59$aQK0cnvG7iaEmO3{<*KCBSj$8;QzNxX}zpiRv8ZgqY^eZ_Q|V`pWxQhGL(%J z9y5zGQ5epP?eO)QE-)BN z!7BC%_{_WU?sNM2?AV4QNikGSD55{v373|T@F{ObN2}D)+Tz&3dxI;+$>L`+MLEI# zHSZw74Vcjh*PALgMtQpZECgn&d0vxyDTe3^v9ZBXKFn5eJueh^2%oM{;nt0aId<)* z6Xm%y(fknQr^>?b>F0`!lPccqNJwlqG<|W=0w#dNUhgF1#0|j&wZ}l@w z6};=LWgXV+rV)Mb%&hVTcBo|=J8L3hSL!7#w&20rL^&NNpXBm$hVKl9n9#S!8|?Bb z_DtHqda9L)c9TvhlLnM6I(p5qEVEn4UHb=)2A6fXJsotABi5|*wXSCQKFww9(R9A; zXyc6tRi)kStiG(+&HJVW(1K0+=iTZcO6ks2Om3H2P-2yfY!63bI3QU(r1b?WhRUX{ zfUMSFOzO!72O<`h?_9OEl%GBs(Qi_-u)H3B(UqJS4&WEYC2jj%Ow4?S%S@7zQbu|Q z2uUrW$c1oC<1^CO%h@84MuG{fhTx@_O3I*ZPg7U-;fneTzqyKwYbX zYiL^{CD=roHCX!(JJ)PJOs0VMF$!zxTUdjxUT)12@rPF{bFb?LpKm2~T6CAqmeB@K zi@B6WeR?>4u+2wqWkpk3=)d;qgHmKO&zw%=ps$-7`8aXGZ(+Cbif-YJ8McAnFST}S zwp$TNlr|d_!TL|`Ef2O^iCxvJ>+O~fnhpUa-i>rj+|tzCvfg*>hGt(}DMdzHt#!0Y zyGe%5i+Hflc}#>acXX*s=J@v>%dXC{^cHht8UV3DJ>);L=E zIG%DtJ1lxoJR3iq&Os0G&{7pSfg(9y4rC5*;o7bRC7q){XmUb*dKrHEBYu`}@Kl>a zWgbYQ$yrtG1Q3YN*gw*HlLWC$*7|#RgKI9NLJai<#QmphP(!r4Qn*w%`;vr|7E!VZ zA&A>xi@iQcMKPFNPqjC{9#t786p<~hh5rGiqMMhr)}o+iQ-2cR{Ly-UA8?apdlV{)3()(2xEeTiai`zNr5>n*wAS6jF+E zW_#Oi-66)WrC&|J^{zHX4^u;#xCe&)rL7qo3bM;Cr$ttLOr{t{CMk=i($ymY#3m{EyVE*%0y<{fls`Y+8Ois7_CiDpeN_Acs$fQ}L_Z7+dR->{7&Uk_+4( zqfC9y^U`cypS@XM1L?2FOjpS27))N-@zz3+dpSO14?7z(2cgcLx37Ce1@eNeu{*MN?V7uLy#06^gA73V(es5pj*0zqxb!&K; zz4c2{f6F%8fTN(0O=76VAd?&t-Kngt(wOOp4ni8S3LA3z(jIqnE2=_(jj^{!3KZh^ zx~g(@Y4keT&mu8ZRfPnFVW(Fh*Y}UBO;B@ZX9xNl=e&)SDOeOaLgqB|;=2w2f{1L2 zd<5`H9YJVjHXvVuBRujbCzxV=RJv9PoIZRkg^RF*3K6p%+&`#8esv!UYM0#z{e)HK z9G(S-WZyU`P4n@JHD2^becvp=CuQ^I1zIb-YaNpn zS4*0QbB~_ARS^JsRk7)-Lm6)KN)x@OARaKPVe!)t3jM&N3O0D&a`i**^m3Fn6JcR* z!G$o#8v&9%j*0{|4cd&>xp9=d)`7RLPM#Tuf9Kl^-|WNB;Hp1PmE~TjHuY%%@?d~p z7PGZw0{O;KdRNsB5atN-D&P7_M5Z~eT7d}|{OSsV6pfYx?3pjM{{AA3jQsPCu%t3% zjP2<%B2gStKw&|<^yWuJ$s}NIuI`6Hwy0tf5|Ef@c-J0s> zGqN9hlgD6jQ?6VY7X~rfK=eSCax*5lgTFETkCA>pqcZBe_-W&zG%&qHDcg#7#2b=7_KNR>FQ4^Qe0I(drOXc7LKJ7Peg@<)l1eiZI^#DD z-_+_KdJ$zHU;tvJbgiq3pvyWZH%}S;3mReC7oQM!U(QO4i+{A-VT#6-0$aJJ3bG%| zI12X9lf|IKGNY5!>QW@+Tv|YC{>h!M)u`fRuy{r97yPB?)u+k^1c0o9D0z#IKTX`# zBD(q8fSqA!CQJ5LEJ!o+gM?w0!x#|Tplwv710mx*ESrJase=Yyw`Ks;i?j@T_k&^9 z9R>kw+d${;{pYMPX=M&t&qpg}u*H2v zl0x8|riQ^(h6TPDY-@-@jXt@YU}6}dK7cy&zK_xKqKt9KS)6merLmG9ND0YV4F+Jb z7T|*kjn0-bTKe_nMdKK6_XVHzTy*J=-Pt%^MStS8wP27k{*&=NBuV_Knq&CphSm4p z7N4c~YCuTWh9X>JwNLN;>;_L%v;YDyNsXbSa6s8B|FGU;$!*h{$POR5-YF)yhy4hb z(L-OE61I&W(C8s3nYspCN&91ql1C(QzEn~Qoc(9?M-)PM5-o4Y`ovxO*+D89@DcMW11VX3q5Fd zzB%h*%+VVe$c#$Lxn(685MtP&;GrZjHS@i5FQOungz6G#int*;X!#hMtwyMu7Igt+&%eP4lP06Kz3UF9DfW%bP8?Ktfb+uHV(d%+OC~8XzEHOsP z`|=*=4)<>MBOA_Ui0D3hd+qK?eWq^lupW<0{icG!ve!e%H0ZlFjU4C@3d$)|z{#ct zem`1N2=zK3dVBv>(SpZ40%8QQ%qWZQ;SE#4Y?Q)Wi-T6O>R>E(BiO z26N%Jm)sUBao!va4FG-T{h)vv1n}F9`@MB@N;h)QiE{xzL>Rh%ybt7?q455b%X6g% zi=2a>dF&0$&MZ~mkb02|$^LF6OaEYi=K#E;ul2irw0=Zk^5Ei>q9q0Dz?cpEu$RP? z>hwPF;Gv|mH!|E%5ag9v$Aqa`ju?K#VZF_@F)O)HHUs*jhR2>BBD{FsE_>1lw5b>^ z?nix#ZH!`1b49*-P-|s^RK1b=q$$ZJ7|mZEO(1jjroEycA@lR==!r$Txll4q>rCoc zGF7{irm#7G%_2~mCaxOEnORz+*j<_ATnuH2&s3B3<=jI6$p1h@9F1U1o}fVKZrZ2- zO!7!?F>2Bjt)__|fY<;A9TJ(6{v>oG5E`G?8v)2YmT`2gJVpFEf@O5VU##o=(^DIG zWL7?}-*ZU5lZ_@`=1eT@$|ogB^z$9kDS(I!D_NP>&FMTq@un_6^ zcW8k-7s=0_OV~bLk`6RI(+FuUirMQz400E1?~o%N*>~%PlvCSMmoK{fI*jZ#h1)61 zz1jkZ+s@01RY_=F%*PPtmj$!g@~Rlw}i|V_%_}+d_YL+2C0hz z{Va)C-nU{1IIjwI63_$8M-CyG1&FUP3^(nMhefD?1Y}|`;anU~b1yYB@*4sfg0>(W zi}y(ddj9*c53ctfzTG6lGnod#tT+Je&+a&dlTGTh4YowhSqA*B7h(Cww!e=wEoT~3 zGf}dsc*V1vAzt)9Y^mc$KB7Dz(&bzbI;9QJeix02S2HpjN?oJ=;a!Y0V|srb>)@nL zP$*6Jrw=)7l?m1FYb9-c+jie+T1M|UWVPM@34h#ELg8GE`9iNle|m+VKS$o7|H-=A zF{>$axE0%S;+$g@)91^>xUOV$?bsoa!cf|X&YJjvb*}zSBX;}KW`~I2Yb`i!s(woE zo~x+(xA}NB|7eH#;4@MQ6JtSy4nnS^vVMrm% zckJ3{A5X_(p2OCy#G!4A>u23 zek@2}RToyxQj|CNRe#5oz`6KLC(sD-;9)abuV2>`dDQ|ytMy#LzddgMvbtS7 z11ILcTyKB=I35NC@!$W?<`;0;fBD;2#^iw95t!uu`mS#Yh4Be8ESa2^{YvDMLr@Ea+$~jsT8b0f^ujbr%I3sqrzuQS5*G1}cm)G}ont TD;OKapI4B5rYH#_p8Wm~v*H@q literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/CMakeLists.txt new file mode 100755 index 0000000000..96498624f9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(UseLibrary) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/README.md new file mode 100755 index 0000000000..f713db02bb --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/README.md @@ -0,0 +1,126 @@ + +# Using FPGA Cross-Language Libraries +This FPGA tutorial demonstrates how to build DPC++ device libraries from various sources and use them in your DPC++ design. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How to create and use libraries in DPC++ FPGA projects
How power users can incorporate RTL source code in DPC++ for FPGA +| Time to complete | 15 minutes + +_Notice: The FPGA library feature is not yet supported in Windows*_ + +## Purpose +This FPGA tutorial demonstrates how to build DPC++ device libraries from multiple sources and use them in your DPC++ design. A library is useful for reusing and sharing code, or for separating code for testing purposes. Power users can also use libraries to leverage the features of other programming languages in their DPC++ FPGA designs. + +It is currently possible to generate FPGA library objects from the following source types: +* Verilog or VHDL (modules or entities respectively) +* Intel® High Level Synthesis Compiler (HLS) functions +* OpenCL* 1.2 functions +* SYCL* or DPC++ functions + +This code sample uses libraries from all four supported source types within a single project. + +### Generating a library +To create a library from source code, use the following steps: + +1. `fpga_crossgen` creates object file that contains representations for target devices (FPGA) and FPGA emulator. The following commands instruct `fpga_crossgen` to generate DPC++ target objects from the four sources in this tutorial: + + ``` + fpga_crossgen lib_hls.cpp --source hls --target sycl -o lib_hls.o + fpga_crossgen lib_ocl.cl --source ocl --target sycl -o lib_ocl.o + fpga_crossgen lib_sycl.cpp --source sycl --target sycl -o lib_sycl.o + fpga_crossgen lib_rtl_spec.xml --emulation_model lib_rtl_model.cpp --target sycl -o lib_rtl.o + ``` + Notice that generating an RTL library requires that an `xml` file and emulation model be provided in addition to the Verilog source code. Examine the tutorial source code and the comments in `use_library.cpp` for more details. +2. `fpga_libtool` collects one or more objects into a DPC++ library archive file. This command creates a single library archive file from the four object files generated by `fpga_crossgen` in the previous step: + + ``` + fpga_libtool lib_hls.o lib_ocl.o lib_rtl.o lib_sycl.o --target sycl --create lib.a + ``` +### Using the library +To use the generated library in your project, simply add the generated library archive file to the list of input source files when invoking `dpcpp`. To compile the `use_library` tutorial, pass both `use_library.cpp` and `lib.a` as inputs. +``` +# Compile for FPGA emulator +dpcpp -fintelfpga use_library.cpp lib.a -o use_library_emu.fpga -DFPGA_EMULATOR + +# Compile for FPGA hardware +dpcpp -fintelfpga use_library.cpp lib.a -o use_library.fpga -Xshardware +``` + + +## Key Concepts +* How to create and use libraries in DPC++ FPGA projects +* How power users can incorporate RTL source code in DPC++ for FPGA + +## License +This code sample is licensed under MIT license. + + +## Building the `use_library` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded here. + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./use_library.fpga_emu (Linux) + ``` +2. Run the sample on the FPGA device: + ``` + ./use_library.fpga (Linux) + ``` + +### Example of Output +``` +PASSED: result is correct! +``` diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/sample.json new file mode 100755 index 0000000000..a9b38b95f2 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/sample.json @@ -0,0 +1,34 @@ +{ + "guid": "9605DCBF-6DDB-4FD2-812F-1ECF252AE334", + "name": "Using FPGA Cross-Language Libraries", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "Tutorial demonstrating how to create FPGA libraries and to incorporate them in a DPC++ project", + "toolchain": ["dpcpp"], + "os": ["linux"], + "targetDevice": ["FPGA"], + "builder": ["cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./use_library.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/CMakeLists.txt new file mode 100755 index 0000000000..0f6889708b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/CMakeLists.txt @@ -0,0 +1,133 @@ +set(SOURCE_FILE use_library.cpp) +set(HEADER_FILE lib.hpp) +set(TARGET_NAME use_library) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) +set(REPORT_TARGET ${TARGET_NAME}_report.a) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HLS_SOURCE lib_hls.cpp) +set(HLS_SOURCE_OBJECT lib_hls.o) + +set(OCL_SOURCE lib_ocl.cl) +set(OCL_SOURCE_OBJECT lib_ocl.o) + +set(SYCL_SOURCE lib_sycl.cpp) +set(SYCL_SOURCE_OBJECT lib_sycl.o) + +set(RTL_C_MODEL lib_rtl_model.cpp) +set(RTL_SPEC lib_rtl_spec.xml) +set(RTL_V lib_rtl.v) +set(RTL_SOURCE_OBJECT lib_rtl.o) + +set(LIBRARY_ARCHIVE lib.a) + +set(LIBRARY_DEVICE_LINK_FLAGS "${LIBRARY_ARCHIVE}") +set(LIBRARY_HOST_LINK_FLAGS "${HLS_SOURCE_OBJECT} ${OCL_SOURCE_OBJECT} ${SYCL_SOURCE_OBJECT} ${RTL_SOURCE_OBJECT}") + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${LIBRARY_DEVICE_LINK_FLAGS} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga ${LIBRARY_DEVICE_LINK_FLAGS}") + +#create hls source object +add_custom_target( + create_hls_source_object + COMMAND fpga_crossgen ${HLS_SOURCE} --source hls --target sycl -o ${HLS_SOURCE_OBJECT} ${CMAKE_CXX_FLAGS} + ) + +#create ocl source object +add_custom_target( + create_ocl_source_object + COMMAND fpga_crossgen ${OCL_SOURCE} --source ocl --target sycl -o ${OCL_SOURCE_OBJECT} + ) + +#create sycl source object +add_custom_target( + create_sycl_source_object + COMMAND fpga_crossgen ${SYCL_SOURCE} --source sycl --target sycl -o ${SYCL_SOURCE_OBJECT} ${CMAKE_CXX_FLAGS} + ) + +#create rtl source object +add_custom_target( + create_rtl_source_object + COMMAND fpga_crossgen ${RTL_SPEC} --emulation_model ${RTL_C_MODEL} --target sycl -o ${RTL_SOURCE_OBJECT} + ) + +#create library achive +add_custom_target( + create_library_archive + COMMAND fpga_libtool ${HLS_SOURCE_OBJECT} ${OCL_SOURCE_OBJECT} ${SYCL_SOURCE_OBJECT} ${RTL_SOURCE_OBJECT} --target sycl --create ${LIBRARY_ARCHIVE} + DEPENDS create_hls_source_object create_ocl_source_object create_sycl_source_object create_rtl_source_object + ) + +# fpga emulator +set(SOURCE_OBJ_FILE_EMU ${SOURCE_FILE}.emu.o) +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) +separate_arguments(EMULATOR_COMPILE_FLAGS_LIST UNIX_COMMAND "${EMULATOR_COMPILE_FLAGS}") +add_custom_command(OUTPUT ${SOURCE_OBJ_FILE_EMU} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_FILE} ${EMULATOR_COMPILE_FLAGS_LIST} -c -o ${SOURCE_OBJ_FILE_EMU} + DEPENDS ${SOURCE_FILE} ${HEADER_FILE}) +separate_arguments(EMULATOR_LINK_FLAGS_LIST UNIX_COMMAND "${EMULATOR_LINK_FLAGS}") +add_custom_command(OUTPUT ${EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_OBJ_FILE_EMU} ${EMULATOR_LINK_FLAGS_LIST} -o ${CMAKE_BINARY_DIR}/${EMULATOR_TARGET} + DEPENDS ${SOURCE_OBJ_FILE_EMU} create_library_archive) + +# fpga +set(SOURCE_OBJ_FILE_FPGA ${SOURCE_FILE}.fpga.o) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +separate_arguments(HARDWARE_COMPILE_FLAGS_LIST UNIX_COMMAND "${HARDWARE_COMPILE_FLAGS}") +add_custom_command(OUTPUT ${SOURCE_OBJ_FILE_FPGA} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_FILE} ${HARDWARE_COMPILE_FLAGS_LIST} -c -o ${SOURCE_OBJ_FILE_FPGA} + DEPENDS ${SOURCE_FILE} ${HEADER_FILE}) +separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") +add_custom_command(OUTPUT ${FPGA_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_OBJ_FILE_FPGA} ${HARDWARE_LINK_FLAGS_LIST} -o ${CMAKE_BINARY_DIR}/${FPGA_TARGET} + DEPENDS ${SOURCE_OBJ_FILE_FPGA} create_library_archive) + +# report +set(SOURCE_OBJ_FILE_REPORT ${SOURCE_FILE}.report.o) +add_custom_target(report DEPENDS ${REPORT_TARGET}) +add_custom_command(OUTPUT ${SOURCE_OBJ_FILE_REPORT} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_FILE} ${HARDWARE_COMPILE_FLAGS_LIST} -c -o ${SOURCE_OBJ_FILE_REPORT} + DEPENDS ${SOURCE_FILE} ${HEADER_FILE}) +add_custom_command(OUTPUT ${REPORT_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${SOURCE_OBJ_FILE_REPORT} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link -o ${CMAKE_BINARY_DIR}/${REPORT_TARGET} + DEPENDS ${SOURCE_OBJ_FILE_REPORT} create_library_archive) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${HEADER_FILE} ${HEADER_FILE} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${HLS_SOURCE} ${HLS_SOURCE} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${OCL_SOURCE} ${OCL_SOURCE} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SYCL_SOURCE} ${SYCL_SOURCE} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${RTL_SPEC} ${RTL_SPEC} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${RTL_C_MODEL} ${RTL_C_MODEL} COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${RTL_V} ${RTL_V} COPYONLY) + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}_emu.fpga + DEPENDS ${TARGET_NAME}_emu.fpga) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib.hpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib.hpp new file mode 100755 index 0000000000..968b1139c4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib.hpp @@ -0,0 +1,9 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +SYCL_EXTERNAL float HlsSqrtf(float); +SYCL_EXTERNAL extern "C" float OclSquare(float); +SYCL_EXTERNAL float SyclSquare(float); +SYCL_EXTERNAL extern "C" unsigned RtlByteswap(unsigned x); diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_hls.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_hls.cpp new file mode 100755 index 0000000000..7e488a1271 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_hls.cpp @@ -0,0 +1,7 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include "HLS/math.h" +float HlsSqrtf(float x) { return sqrtf(x); } diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_ocl.cl b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_ocl.cl new file mode 100755 index 0000000000..bf2a1c4930 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_ocl.cl @@ -0,0 +1,6 @@ +//============================================================== +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +float OclSquare(float x) { return x * x; } diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl.v b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl.v new file mode 100755 index 0000000000..28c1ad0f96 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl.v @@ -0,0 +1,18 @@ +`timescale 1 ps / 1 ps + +module byteswap_uint ( + input clock, + input resetn, + input ivalid, + input iready, + output ovalid, + output oready, + input [31:0] datain, + output [31:0] dataout); + + assign ovalid = 1'b1; + assign oready = 1'b1; + // clk, ivalid, iready, resetn are ignored + assign dataout = {datain[15:0], datain[31:16]}; + +endmodule diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_model.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_model.cpp new file mode 100755 index 0000000000..1c74a74b8b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_model.cpp @@ -0,0 +1,6 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +extern "C" unsigned RtlByteswap(unsigned x) { return x << 16 | x >> 16; } diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_spec.xml b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_spec.xml new file mode 100755 index 0000000000..361ef11e8a --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_rtl_spec.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_sycl.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_sycl.cpp new file mode 100755 index 0000000000..dcda51b31d --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/lib_sycl.cpp @@ -0,0 +1,7 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +SYCL_EXTERNAL float SyclSquare(float x) { return x * x; } diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/use_library.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/use_library.cpp new file mode 100755 index 0000000000..6af7f26437 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Tools/use_library/src/use_library.cpp @@ -0,0 +1,89 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include "dpc_common.hpp" +#include "lib.hpp" + +using namespace sycl; + +// Values used as input to the kernel +constexpr float kA = 2.0f; +constexpr float kB = 3.0f; + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +class KernelCompute; + +int main() { + unsigned result = 0; + + // Select either the FPGA emulator (CPU) or FPGA device +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + queue q(device_selector, dpc_common::exception_handler); + + // The scalar inputs are passed to the kernel using the lambda capture, + // but a SYCL buffer must be used to return a scalar from the kernel. + buffer buffer_c(&result, 1); + + q.submit([&](handler &h) { + + // Accessor to the scalar result + auto accessor_c = buffer_c.get_access(h); + + // Kernel + h.single_task([=]() { + + // OclSquare is an OpenCL function, defined in lib_ocl.cl. + float a_sq = OclSquare(kA); + + // HlsSqrtf is an Intel HLS component, defined in lib_hls.cpp. + // (Intel HLS is a C++ based High Level Synthesis language for FPGA.) + float a_sq_sqrt = HlsSqrtf(a_sq); + + // SyclSquare is a SYCL library function, defined in lib_sycl.cpp. + float b_sq = SyclSquare(kB); + + // RtlByteswap is an RTL library. + // - When compiled for FPGA, Verilog module byteswap_uint in lib_rtl.v + // is instantiated in the datapath by the compiler. + // - When compiled for FPGA emulator (CPU), the C model of RtlByteSwap + // in lib_rtl_model.cpp is used instead. + accessor_c[0] = RtlByteswap((unsigned)(a_sq_sqrt + b_sq)); + }); + }); + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + // Compute the expected "golden" result + unsigned gold = sqrt(kA * kA) + (kB * kB); + gold = gold << 16 | gold >> 16; + + // Check the results + if (result != gold) { + std::cout << "FAILED: result is incorrect!\n"; + return -1; + } + std::cout << "PASSED: result is correct!\n"; + return 0; +} diff --git a/DirectProgramming/FPGA/.gitkeep b/DirectProgramming/FPGA/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 From b6d86b6cebd1b05b2363f20bcf7756bb68efc92c Mon Sep 17 00:00:00 2001 From: Dmitriy Sobolev <69916350+dsobolev-dev@users.noreply.github.com> Date: Thu, 20 Aug 2020 17:26:30 +0300 Subject: [PATCH 08/17] oneDPL samples README/samples.json - Updated DPC++/C++ Compiler labels (#95) * oneDPL samples README/samples.json - Updated DPC++/C++ Compiler labels * Remove spare spaces --- Libraries/oneDPL/gamma-correction/README.md | 2 +- Libraries/oneDPL/gamma-correction/sample.json | 2 +- Libraries/oneDPL/stable_sort_by_key/README.md | 2 +- Libraries/oneDPL/stable_sort_by_key/sample.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Libraries/oneDPL/gamma-correction/README.md b/Libraries/oneDPL/gamma-correction/README.md index 718412cc40..143148d47b 100644 --- a/Libraries/oneDPL/gamma-correction/README.md +++ b/Libraries/oneDPL/gamma-correction/README.md @@ -5,7 +5,7 @@ Gamma correction is a nonlinear operation used to encode and decode the luminanc |---------------------------------|----------------------------------------------------------------------------------| | OS | Linux* Ubuntu* 18.04, Windows 10 | | Hardware | Skylake with GEN9 or newer | -| Software | Intel® oneAPI DPC++ Compiler beta; Intel® oneAPI DPC++ Library (oneDPL) | +| Software | Intel® oneAPI DPC++/C++ Compiler; Intel® oneAPI DPC++ Library (oneDPL) | | What you will learn | How to offload the computation to GPU using Intel® oneAPI DPC++ Library | | Time to complete | At most 5 minutes | diff --git a/Libraries/oneDPL/gamma-correction/sample.json b/Libraries/oneDPL/gamma-correction/sample.json index e2d46465e3..d99ff901c7 100644 --- a/Libraries/oneDPL/gamma-correction/sample.json +++ b/Libraries/oneDPL/gamma-correction/sample.json @@ -1,6 +1,6 @@ { "name": "Gamma Correction", - "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/oneAPI DPC++ Library/CPU and GPU"], + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/oneAPI DPC++ Library/CPU and GPU"], "description": "gamma correction - a nonlinear operation used to encode and decode the luminance of each image pixel.", "toolchain": ["dpcpp"], "languages": [{"cpp":{}}], diff --git a/Libraries/oneDPL/stable_sort_by_key/README.md b/Libraries/oneDPL/stable_sort_by_key/README.md index d6c12c5c84..4ed1222c3b 100644 --- a/Libraries/oneDPL/stable_sort_by_key/README.md +++ b/Libraries/oneDPL/stable_sort_by_key/README.md @@ -7,7 +7,7 @@ Stable sort by key is a sorting operation when sorting of 2 sequences (keys and |---------------------------------|----------------------------------------------------------------------------------| | OS | Linux* Ubuntu* 18.04 | | Hardware | Skylake with GEN9 or newer | -| Software | Intel® oneAPI DPC++ Compiler beta; Intel® oneAPI DPC++ Library (oneDPL) | +| Software | Intel® oneAPI DPC++/C++ Compiler; Intel® oneAPI DPC++ Library (oneDPL) | | What you will learn | How to use `counting_iterator` and `zip_iterator` | | Time to complete | At most 5 minutes | diff --git a/Libraries/oneDPL/stable_sort_by_key/sample.json b/Libraries/oneDPL/stable_sort_by_key/sample.json index 66e315d599..a4c803bb7d 100644 --- a/Libraries/oneDPL/stable_sort_by_key/sample.json +++ b/Libraries/oneDPL/stable_sort_by_key/sample.json @@ -1,6 +1,6 @@ { "name": "Stable sort by key", - "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/oneAPI DPC++ Library/CPU and GPU"], + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Intel® oneAPI DPC++/C++ Compiler/oneAPI DPC++ Library/CPU and GPU"], "description": "It models stable sort by key: during the sorting of 2 sequences (keys and values) only keys are compared but both keys and values are swapped", "toolchain": ["dpcpp"], "languages": [{"cpp":{}}], From d30b7ee19260de5d14bff8c236317cb4da3d29d1 Mon Sep 17 00:00:00 2001 From: Tankut Baris Aktemur <55686642+barisaktemur@users.noreply.github.com> Date: Thu, 20 Aug 2020 16:27:35 +0200 Subject: [PATCH 09/17] array-transform: update the devcloud note (#96) Signed-off-by: Tankut Baris Aktemur --- Tools/ApplicationDebugger/array-transform/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Tools/ApplicationDebugger/array-transform/README.md b/Tools/ApplicationDebugger/array-transform/README.md index 255cfe8957..89023a1429 100644 --- a/Tools/ApplicationDebugger/array-transform/README.md +++ b/Tools/ApplicationDebugger/array-transform/README.md @@ -93,7 +93,14 @@ system. If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in -batch or interactive mode. For more information see the Intel® oneAPI +batch or interactive mode. For the array transform sample, a node +with GPU and an interactive shell is recommended. + +``` +$ qsub -I -l nodes=1:gpu:ppn=2 +``` + +For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/). From 6bb3678090bf25301edf2b6aad889b2565dbb1f5 Mon Sep 17 00:00:00 2001 From: lqnguyen Date: Thu, 20 Aug 2020 07:35:15 -0700 Subject: [PATCH 10/17] Add a new code sample "PrefixSum". (#89) * Add bitonic-sort sample. * Add a note about common file in README. Signed-off-by: Loc Nguyen * Move 1d_HeatTransfer sample to open source GitHub. Signed-off-by: Loc Nguyen * Updating License file to remove date * Adding Buffer Object approach. * Add comment about the location of dpc_common.hpp. * New sample: Prefix Sum. * Remove new sample. * New code sample PrefixSum in ParallelPatterns. Signed-off-by: Loc Nguyen --- .../ParallelPatterns/PrefixSum/CMakeLists.txt | 30 +++ .../ParallelPatterns/PrefixSum/License.txt | 7 + .../ParallelPatterns/PrefixSum/PrefixSum.sln | 25 ++ .../PrefixSum/PrefixSum.vcxproj | 137 ++++++++++ .../PrefixSum/PrefixSum.vcxproj.filters | 22 ++ .../PrefixSum/PrefixSum.vcxproj.user | 11 + .../ParallelPatterns/PrefixSum/README.md | 124 +++++++++ .../ParallelPatterns/PrefixSum/sample.json | 29 +++ .../PrefixSum/src/PrefixSum.cpp | 239 ++++++++++++++++++ 9 files changed, 624 insertions(+) create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/License.txt create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.sln create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.filters create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.user create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/README.md create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/sample.json create mode 100644 DirectProgramming/DPC++/ParallelPatterns/PrefixSum/src/PrefixSum.cpp diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/CMakeLists.txt new file mode 100644 index 0000000000..85fcec4963 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/CMakeLists.txt @@ -0,0 +1,30 @@ +# required cmake version +cmake_minimum_required(VERSION 3.5) + +project (PrefixSum) + +if(WIN32) + set(CMAKE_CXX_COMPILER "dpcpp") +else() + set(CMAKE_CXX_COMPILER "dpcpp") +endif() + +# Set default build type to RelWithDebInfo if not specified +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") + set (CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" + FORCE) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fsycl -std=c++17") + +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lOpenCL -lsycl") + +add_executable (PrefixSum src/PrefixSum.cpp) + +add_custom_target (run + COMMAND PrefixSum 21 47 + WORKING_DIRECTORY ${CMAKE_PROJECT_DIR} +) + diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/License.txt b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/License.txt new file mode 100644 index 0000000000..415025cf03 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.sln b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.sln new file mode 100644 index 0000000000..3587a92e74 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29926.136 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PrefixSum", "PrefixSum.vcxproj", "{BC12ABE6-7951-47D6-93DC-126F8A5FCFD2}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {BC12ABE6-7951-47D6-93DC-126F8A5FCFD2}.Debug|x64.ActiveCfg = Debug|x64 + {BC12ABE6-7951-47D6-93DC-126F8A5FCFD2}.Debug|x64.Build.0 = Debug|x64 + {BC12ABE6-7951-47D6-93DC-126F8A5FCFD2}.Release|x64.ActiveCfg = Release|x64 + {BC12ABE6-7951-47D6-93DC-126F8A5FCFD2}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9B9594EB-112B-4FAE-AD1F-04BD8FF34B9F} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj new file mode 100644 index 0000000000..6a6309b96b --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj @@ -0,0 +1,137 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + 15.0 + {bc12abe6-7951-47d6-93dc-126f8a5fcfd2} + Win32Proj + PrefixSum + 10.0.17763.0 + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + + + + + Console + true + + + + + + + + + %ONEAPI_ROOT%\dev-utilities\latest\include + + + Console + true + + + + + + + + + + + Console + true + true + true + + + + + + + + + %ONEAPI_ROOT%\dev-utilities\latest\include;%(AdditionalIncludeDirectories) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.filters b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.filters new file mode 100644 index 0000000000..2003dce0f2 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.user b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.user new file mode 100644 index 0000000000..7288fa06dd --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/PrefixSum.vcxproj.user @@ -0,0 +1,11 @@ + + + + 21 47 + WindowsLocalDebugger + + + 21 47 + WindowsLocalDebugger + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/README.md b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/README.md new file mode 100644 index 0000000000..6bbc2cfdfb --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/README.md @@ -0,0 +1,124 @@ +# `Prefix Sum` sample + +This code sample demonstrates the implementation of parallel prefix sum using Intel Data Parallel C++ to +offload the computation to a GPU. In this implementation, a random sequence of 2**n elements is given +(n is a positive number) as input, the algorithm compute the prefix sum in parallel. The result sequence is +in ascending order. + +For comprehensive instructions regarding DPC++ Programming, go to +https://software.intel.com/en-us/oneapi-programming-guide +and search based on relevant terms noted in the comments. + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04 +| Hardware | Skylake with GEN9 or newer +| Software | Intel® oneAPI DPC++ Compiler (beta); Intel C++ Compiler (beta) +| What you will learn | Implement bitonic sort using Intel DPC++ compiler +| Time to complete | 15 minutes + + +## Purpose + +Given a randomized sequence of numbers x0, x1, x2, ..., xn, this algorithm computes and returns +a new sequence y0, y1, y2, ..., yn so that + +y0 = x0 +y1 = x0 + x1 +y2 = x0 + x1 + x2 +..... +yn = x0 + x1 + x2 + ... + xn + +Below is the pseudo code for computing prefix sum in parallel: + +n is power of 2 (1, 2, 4 , 8, 16, ...): + +for i from 0 to [log2 n] - 1 do + for j from 0 to (n-1) do in parallel + if j<2^i then + x_{j}^{i+1} <- x_{j}^{i}} + else + x_{j}^{i+1} <- x_{j}^{i} + x_{j-2^{i}}^{i}} + +In the above, the notation x_{j}^{i} means the value of the jth element of array x in timestep i. +Given n processors to perform each iteration of the inner loop in constant time, the algorithm +as a whole runs in O(log n) time, the number of iterations of the outer loop. + +The code will attempt first to execute on an available GPU and fallback to the system's CPU if a +compatible GPU is not detected. + +## Key Implementation Details + +The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command +groups. + +## License +This code sample is licensed under MIT license + +## Building the `PrefixSum` Program for CPU and GPU + +### Include Files +The include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, +FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI +Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) + +### On a Linux* System +1. Build the program using the following `cmake` commands. + ``` + $ cd PrefixSum + $ mkdir build + $ cd build + $ cmake .. + $ make + ``` + +2. Run the program: + ``` + make run + ``` + +3. Clean the program using: + ``` + make clean + ``` + +### On a Windows* System + * Build the program using VS2017 or VS2019 + Right click on the solution file and open using either VS2017 or VS2019 IDE. + Right click on the project in Solution explorer and select Rebuild. + From top menu select Debug -> Start without Debugging. + + * Build the program using MSBuild + Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for + VS2019" + Run - MSBuild PrefixSum.sln /t:Rebuild /p:Configuration="Release" + +## Running the sample +### Application Parameters + + Usage: PrefixSum + +where + +exponent is a positive number. The according length of the sequence is 2**exponent. + +seed is the seed used by the random generator to generate the randomness. + +The sample offloads the computation to GPU and then performs the verification the results in the CPU. +The results are verified if yk = yk-1 + xk the original compared. If the results are matched and +the ascending order is verified, the application will display a “Success!” message. + +### Example of Output +``` +$ ./PrefixSum 21 47 + +Sequence size: 2097152, seed: 47 +Num iteration: 21 +Device: Intel(R) Gen9 HD Graphics NEO +Kernel time: 170 ms + +Success! +``` diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/sample.json b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/sample.json new file mode 100644 index 0000000000..def268a2f8 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/sample.json @@ -0,0 +1,29 @@ +{ + "guid": "5D274319-02EE-44B0-B055-71E4C50D05E0", + "name": "PrefixSum", + "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU" ], + "description": "Compute Prefix Sum using Intel® oneAPI DPC++ Language", + "toolchain": [ "dpcpp" ], + "targetDevice": [ "CPU", "GPU" ], + "languages": [ { "cpp": {} } ], + "os": [ "linux", "windows" ], + "builder": [ "ide", "cmake" ], + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [{ + "steps": [ + "MSBuild PrefixSum.sln /t:Rebuild /p:Configuration=\"Release\"", + "cd x64/Release", + "PrefixSum.exe 21 47" + ] + }] + } +} diff --git a/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/src/PrefixSum.cpp b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/src/PrefixSum.cpp new file mode 100644 index 0000000000..b2af8367a7 --- /dev/null +++ b/DirectProgramming/DPC++/ParallelPatterns/PrefixSum/src/PrefixSum.cpp @@ -0,0 +1,239 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// PrefixSum: this code sample implements the inclusive scan (prefix sum) in parallel. That +// is, given a randomized sequence of numbers x0, x1, x2, ..., xn, this algorithm computes and +// returns a new sequence y0, y1, y2, ..., yn so that +// +// y0 = x0 +// y1 = x0 + x1 +// y2 = x0 + x1 + x2 +// ..... +// yn = x0 + x1 + x2 + ... + xn +// +// Below is the pseudo code for computing prefix sum in parallel: +// +// n is power of 2 (1, 2, 4 , 8, 16, ...): +// +// for i from 0 to [log2 n] - 1 do +// for j from 0 to (n-1) do in parallel +// if j<2^i then +// x_{j}^{i+1} <- x_{j}^{i}} +// else +// x_{j}^{i+1} <- x_{j}^{i} + x_{j-2^{i}}^{i}} +// +// In the above, the notation x_{j}^{i} means the value of the jth element of array x in timestep i. +// Given n processors to perform each iteration of the inner loop in constant time, the algorithm as +// a whole runs in O(log n) time, the number of iterations of the outer loop. +// + +#include + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +using namespace sycl; +using namespace std; + +void Show(int a[], int arraysize) +{ + for (int i = 0; i < arraysize; ++i) + { + std::cout << a[i] << " "; + if ((i % 16) == 15) std::cout << "\n"; + } + + std::cout << "\n"; + return; +} + +int* ParallelPrefixSum(int* prefix1, int* prefix2, unsigned int nb, queue &q) +{ + unsigned int two_power = 1; + unsigned int num_iter = log2(nb); + //unsigned int uintmax = UINT_MAX; + int* result = NULL; + + // std::cout << "uintmax " << uintmax << " " << log2(uintmax) << "\n"; + // Buffer scope + { + buffer prefix1_buf(prefix1, range<1>{nb}); + buffer prefix2_buf(prefix2, range<1>{nb}); + + // Iterate over the necessary iterations. + for (unsigned int iter = 0; iter < num_iter; iter++, two_power*=2) { + + // Submit command group for execution + q.submit([&](handler& h) { + // Create accessors + auto prefix1_acc = prefix1_buf.get_access(h); + auto prefix2_acc = prefix2_buf.get_access(h); + + if (iter % 2 == 0) { + h.parallel_for(range<1>(nb), [=](id<1> j) { + if (j < two_power) { + prefix2_acc[j] = prefix1_acc[j]; + } + else { + prefix2_acc[j] = prefix1_acc[j] + prefix1_acc[j - two_power]; + } + }); // end parallel for loop in kernel + result = prefix2; + //std::cout << "return prefix2\n"; + } + else { + h.parallel_for(range<1>(nb), [=](id<1> j) { + if (j < two_power) { + prefix1_acc[j] = prefix2_acc[j]; + } + else { + prefix1_acc[j] = prefix2_acc[j] + prefix2_acc[j - two_power]; + } + }); // end parallel for loop in kernel + result = prefix1; + //std::cout << "return prefix1\n"; + } + }); // end device queue + } // end iteration + } // Buffer scope + + // Wait for commands to complete. Enforce synchronization on the command queue + q.wait_and_throw(); + + return result; +} +/* +void PrefixSum(int* x, unsigned int nb) +{ + unsigned int two_power = 1; + unsigned int num_iter = log2(nb); + int temp = 0; + + // Iterate over the necessary iterations + for (unsigned int iter = 0; iter < num_iter; iter++, two_power*=2) { + //Show(x, nb); + // std::cout << "two_power: " << two_power << "\n"; + for (unsigned int j = nb; j > 0; j--) { + if (j < two_power) { + x[j] = x[j]; + } + else { + x[j] = x[j] + x[j - two_power]; + } + } + } +} +*/ +void Usage(std::string prog_name, int exponent) { + std::cout << " Incorrect parameters\n"; + std::cout << " Usage: " << prog_name << " n k \n\n"; + std::cout << " n: Integer exponent presenting the size of the input array. The number of el\ +ement in\n"; + std::cout << " the array must be power of 2 (e.g., 1, 2, 4, ...). Please enter the corre\ +sponding\n"; + std::cout << " exponent betwwen 0 and " << exponent - 1 << ".\n"; + std::cout << " k: Seed used to generate a random sequence.\n"; +} + +int main(int argc, char* argv[]) { + unsigned int nb, seed; + int n, exp_max = log2(std::numeric_limits::max()); + + // Read parameters. + try { + n = std::stoi(argv[1]); + + // Verify the boundary of acceptance. + if (n < 0 || n >= exp_max) { + Usage(argv[0], exp_max); + return -1; + } + + seed = std::stoi(argv[2]); + nb = pow(2, n); + } catch (...) { + Usage(argv[0], exp_max); + return -1; + } + + std::cout << "\nSequence size: " << nb << ", seed: " << seed; + + int num_iter = log2(nb); + std::cout << "\nNum iteration: " << num_iter << "\n"; + + // Define device selector as 'default' + default_selector device_selector; + + // exception handler + auto exception_handler = [](exception_list exceptionList) { + for (std::exception_ptr const& e : exceptionList) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception const& e) { + std::terminate(); + } + } + }; + + // Create a device queue using DPC++ class queue + queue q(device_selector, exception_handler); + + std::cout << "Device: " << q.get_device().get_info() << "\n"; + + int *data = new int[nb]; + int *prefix_sum1 = new int[nb]; + int *prefix_sum2 = new int[nb]; + int *result = NULL; + + srand(seed); + + // Initialize data arrays + for (int i = 0; i < nb; i++) { + data[i] = prefix_sum1[i] = rand() % 10; + prefix_sum2[i] = 0; + } + + // Start timer + auto start = std::chrono::steady_clock::now(); + + result = ParallelPrefixSum(prefix_sum1, prefix_sum2, nb, q); + + auto end = std::chrono::steady_clock::now(); + auto timeKern = std::chrono::duration_cast(end - start).count(); + std::cout << "Kernel time: " << timeKern << " ms" << "\n"; + + //std::cout << "\ndata after transforming using parallel prefix sum result:"; + //Show(result, nb); + + bool equal = true; + + if (result[0] != data[0]) + equal = false; + else + { + for (int i = 1; i < nb; i++) { + if (result[i] != result[i - 1] + data[i]) + { + equal = false; + break; + } + } + } + + delete[] data; + delete[] prefix_sum1; + delete[] prefix_sum2; + + if (!equal) { + std::cout << "\nFailed: " << std::endl; + return -2; + } + else { + std::cout << "\nSuccess!" << std::endl; + return 0; + } +} From fc69f8e815e0ab0ece196781bc650c00476c5d1f Mon Sep 17 00:00:00 2001 From: akertesz <67655634+akertesz@users.noreply.github.com> Date: Fri, 21 Aug 2020 00:02:03 -0400 Subject: [PATCH 11/17] Add fpga_reg and loop_unroll tutorials, with Linux support only (#99) Signed-off-by: Audrey Kertesz --- .../Features/fpga_reg/CMakeLists.txt | 11 + .../Tutorials/Features/fpga_reg/License.txt | 7 + .../Tutorials/Features/fpga_reg/README.md | 188 +++++++++++++++ .../Tutorials/Features/fpga_reg/fpga_reg.png | Bin 0 -> 94859 bytes .../Features/fpga_reg/no_fpga_reg.png | Bin 0 -> 84191 bytes .../Tutorials/Features/fpga_reg/sample.json | 34 +++ .../Features/fpga_reg/src/CMakeLists.txt | 111 +++++++++ .../Features/fpga_reg/src/fpga_reg.cpp | 216 ++++++++++++++++++ .../Features/loop_unroll/CMakeLists.txt | 11 + .../Features/loop_unroll/License.txt | 7 + .../Tutorials/Features/loop_unroll/README.md | 188 +++++++++++++++ .../Features/loop_unroll/sample.json | 34 +++ .../Features/loop_unroll/src/CMakeLists.txt | 89 ++++++++ .../Features/loop_unroll/src/loop_unroll.cpp | 138 +++++++++++ 14 files changed, 1034 insertions(+) create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/fpga_reg.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/no_fpga_reg.png create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/fpga_reg.cpp create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/License.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/README.md create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/sample.json create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/CMakeLists.txt create mode 100755 DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/loop_unroll.cpp diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/CMakeLists.txt new file mode 100755 index 0000000000..325cc3fa42 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(FPGARegister) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/README.md new file mode 100755 index 0000000000..18e2a1f244 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/README.md @@ -0,0 +1,188 @@ +# Explicit Pipeline Register Insertion with `fpga_reg` + +This FPGA tutorial demonstrates how a power user can apply the DPC++ extension `intel::fpga_reg` to tweak the hardware generated by the compiler. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How to use the `intel::fpga_reg` extension
How `intel::fpga_reg` can be used to re-structure the compiler-generated hardware
Situations in which applying `intel::fpga_reg` might be beneficial +| Time to complete | 20 minutes + +_Notice: This code sample is not yet supported in Windows*_ + +## Purpose + +This FPGA tutorial demonstrates an example of using the `intel::fpga_reg` extension to: + +* Help reduce the fanout of specific signals in the DPC++ design +* Improve the overall fMAX of the generated hardware + +Note that this is an advanced tutorial for FPGA power users. + +### Simple Code Example + +The signature of `intel::fpga_reg` is as follows: + +```cpp +template +T intel::fpga_reg(T input) +``` + +To use this function in your code, you must include the following header: + +```cpp +#include +``` + +When you use this function on any value in your code, the compiler will insert at least one register stage between the input and output of `intel::fpga_reg` function. For example: + +```cpp +int func (int input) { + int output = intel::fpga_reg(input) + return output; +} +``` + +This forces the compiler to insert a register between the input and output. You can observe this in the optimization report's System Viewer. + +### Understanding the Tutorial Design + +The basic function performed by the tutorial kernel is a vector dot product with a pre-adder. The loop is unrolled so that the core part of the algorithm is a feed-forward datapath. The coefficient array is implemented as a circular shift register and rotates by one for each iteration of the outer loop. + +The optimization applied in this tutorial impacts the system fMAX or the maximum frequency that the design can run at. Since the compiler implements all kernels in a common clock domain, fMAX is a global system parameter. To see the impact of the `intel::fpga_reg` optimization in this tutorial, you will need to compile the design twice. + +Part 1 compiles the kernel code without setting the `USE_FPGA_REG` macro, whereas Part 2 compiles the kernel while setting this macro. This chooses between two code segments that are functionally equivalent, but the latter version makes use of `intel::fpga_reg`. In the `USE_FPGA_REG` version of the code, the compiler is guaranteed to insert at least one register stage between the input and output of each of the calls to `intel::fpga_reg` function. + +#### Part 1: Without `USE_FPGA_REG` + +The compiler will generate the following hardware for Part 1. The diagram below has been simplified for illustration. + +Part 1 + +Note the following: + +* The compiler automatically infers a tree structure for the series of adders. +* There is a large fanout (of up to 4 in this simplified example) from `val` to each of the adders. + +The fanout grows linearly with the unroll factor in this tutorial. In FPGA designs, signals with large fanout can sometimes degrade system fMAX. This happens because the FPGA placement algorithm cannot place *all* of the fanout logic elements physically close to the fanout source, leading to longer wires. In this situation, it can be helpful to add explicit fanout control in your DPC++ code via `intel::fpga_reg`. This is an advanced optimization for FPGA power-users. + +#### Part 2: with `USE_FPGA_REG` + +In this part, we added two sets of `intel::fpga_reg` within the unrolled loop. The first is added to pipeline `val` once per iteration. This reduce the fanout of `val` from 4 in the example in Part 1 to just 2. The second `intel::fpga_reg` is inserted between accumulation into the `acc` value. This generates the following structure in hardware. + +Part 2 + +In this version, the adder tree has been transformed into a vine-like structure. This increases latency, but it helps us achieve our goal of reducing the fanout and improving fMAX. +Since the outer loop in this tutorial is pipelined and has a high trip count, the increased latency of the inner loop has negligible impact on throughput. The tradeoff pays off, as the fMAX improvement yields a higher performing design. + +## Key Concepts + +* How to use the `intel::fpga_reg` extension +* How `intel::fpga_reg` can be used to re-structure the compiler-generated hardware +* Situations in which applying `intel::fpga_reg` might be beneficial + +## License + +This code sample is licensed under MIT license. + +## Building the `fpga_reg` Design + +### Include Files + +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud + +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Install the design in `build` directory from the design directory by running `cmake`: + + ```bash + mkdir build + cd build + ``` + + If you are compiling for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + + ```bash + cmake .. + ``` + + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ```bash + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design using the generated `Makefile`. The following four build targets are provided that match the recommended development flow: + + * Compile and run for emulation (fast compile time, targets emulates an FPGA device) using: + + ```bash + make fpga_emu + ``` + + * Generate HTML optimization reports using: + + ```bash + make report + ``` + + * Compile and run on FPGA hardware (longer compile time, targets an FPGA device) using: + + ```bash + make fpga + ``` + +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA pre-compiled binary can be downloaded here. + + +### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*). +For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports + +Locate the pair of `report.html` files in either: + +* **Report-only compile**: `fpga_reg_report.prj` and `fpga_reg_registered_report.prj` +* **FPGA hardware compile**: `fpga_reg.prj` and `fpga_reg_registered.prj` + +Open the reports in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. Observe the structure of the design in the optimization report's System Viewer and notice the changes within `Cluster 2` of the `SimpleMath.B1` block. You can notice that in the report for Part 1, the viewer shows a much more shallow graph as compared to the one in Part 2. This is because the operations are performed much closer to one another in Part 1 as compared to Part 2. By transforming the code in Part 2, with more register stages, the compiler was able to achieve an higher fMAX. + +>**NOTE**: Only the report generated after the FPGA hardware compile will reflect the performance benefit of using the `fpga_reg` extension. The difference is *not* apparent in the reports generated by `make report` because a design's fMAX cannot be predicted. The final achieved fMAX can be found in `fpga_reg.prj/reports/report.html` and `fpga_reg_registered.prj/reports/report.html` (after `make fpga` completes). + +## Running the Sample + +1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + + ```bash + ./fpga_reg.fpga_emu # Linux + ``` + +2. Run the sample on the FPGA device + + ```bash + ./fpga_reg.fpga # Linux + ./fpga_reg_registered.fpga # Linux + ``` + +### Example of Output + +```txt +Throughput for kernel with input size 1000000 and coefficient array size 64: 2.819272 GFlops +PASSED: Results are correct. +``` + +### Discussion of Results + +You will be able to observe the improvement in the throughput going from Part 1 to Part 2. You will also note that the fMAX of Part 2 is significantly larger than of Part 1. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/fpga_reg.png b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/fpga_reg.png new file mode 100755 index 0000000000000000000000000000000000000000..fe3391693963381bfc785093ac6d9117107e8e2b GIT binary patch literal 94859 zcmeFZcQoAH7dI+Z5F`Xij2=-EBtaN8h!#;o^p+^m+XzOFXd#T=%S5ywI?+i8L$v5M zgc(FPdY^lS=Xrk5@4oN-=dSzSb=SQ&Ygx&B^PPS6*?XT|KSz*?k}N6lO=1E90#bRo z$EpMb=avZwF1)&Q7CbS2+iD8_I^(D+D@jn$1(^pQ&YMXnN)QkfhmjnO2PH5q z!ZUca3C8n7y++q5Cyzx)n$?eN6}10)ejQf(ix^6ee<%4Obi&pjLXQ8pk}t%rLJ~oU z|97?oqC%JHME!627*S*S;b=U~uNF&D=k6=&ALcK zm01*WHG+t4*oI;uEL5y#XLZBUHEh3dZdcV{Os8RE`a6L{>nn2FN2l7;rg(->?jy4E z-aE1)4#VgY4(q$l9*=f3Nnyu&S(d&qdKezbcDv}f4kd5sY;cK9a;ffn9A#E8TdoRj z7o9m(QaGzaXV~g}r@X*G)W=Dk(lI}woWsES9|GbvNx7ywml~rTN5)kB33ZsHu1q^g zB0b4PuNl}_9>$4U6P)VPL|zYi+?#&aznd^doa456YM*ur`gK8CL-e%Ip#)BO%f1b zGMDg(jie_IO&>VD;{6WY0RavtEiRy=bKJ_OxD*(Hxk{uHHp9W?sK z|4yRE?|hXTnBCVBo}q)@#~a$V2yfRY-GmA1zt4(M2UbX8cm04qm6)8|-*OLourv`j zn5?X_>#O$rwA?fLLQ{s=yt(nl2B|N4{rztAcCn>28J^5y$Zb-Gj)2N{3+x20J0 zv%l29*NSz%^OrH*l%L&AtlOsLDoBVT`ODSHL_&{iB+NpnhdSE+XD+w+HcD3XDRDi; zOyl%)O?~Anr2>VQ#h-og{*VCDkqB9Z>3Iy%xL?PAezbDR0YL`gDfvfk|8IOC+Yddq z*6Pi%3cB$A{p$X7Vh^*p-;p3*0t8i8oDj&mW$w8C{7-N4T}HxAl9h0hXLGxLn9r-; zq_eOpY3WoYuS*9GCtrXuZSP?p(nqo>n~7h-KY1kcJr!*Oy%pwr5_A|fjk>XZ1A!Qa zdndsW^W-daqFQQQ@cK!7d~n+bqO9d0M^AVX3m>&)M4%}nOK)B-{v9qt)`DOCO|3i& zijtZx67}sQ(1i=MVc;9&ZRiC7el2~6bWER!oFa?s>gQXTUwL;hjc?f^95q{zOAr( zhYbWu({`Geys16OE(}D%(L^UF(*@)>APK=fLa?3iIZk?#P&A)|rk=!3dx4X@gN7Uf zKZ`$CUw<*;)06-E2~?z)JuTPBGZwfT2vI)Sb0zIb(lpjnd`G!ju>rO)sP60FTZQYmUSp^v|`WW-SnFpkoHl)YUyrt zg2(dz!(*yQv;|e_Y{Y*HQ<(Dss_#L}e;%_mLXb?Cv|VxOl&`UGg`WRpo%wZ_U0ubH zif_tQnM3Z*A9|97J8!pB88aI0?nf>ER!8h4=@)FNs&2V`u5sRh{s>M4=bC1OB<9O2HpBBfv6+K7xE{7aN2X5C0Thnf@zszc6%v z)Um1C&Qw3Cnf?{5ggbO(%Fw7oV1lyUjnHY2ac=Y$NUI2mY^=u@ZY$8%K3r(1=gr%< zUf;-cGBL3k{Q7DiVwg1}$!-2IStY7FnYor-)7Z*Ww>#_Fm!TRr!X zy6R$o&0R}nbwO=bKQqoQQ{FJFe%~vJj=0bp4*taKXKEq5lv=G^yV ztj<2~5&^LJ0ZVJSy)Sv2=@HSMj<-5i&{*WWs)IO>r&k+I-H|K#Jquka%buA)$1`esP-gA}pHdNnliD;9G*lr=Ww ztyrTx9Z#|tn{vNn)ukjf>wU%u^s{w#{7Kyix{r$6-8F71#W5r2-5^~P9)UBT216~Q zE?=wuWHE>s+~5C&(}m4_*~*jib*kZk!`W|d?iq+nKwofmN> z8DF}w$%a7yM&&@vzWKaREa_M)1339h1=s(Np;^^(xi z4;Z(g28NZXciw3qeLKucq6W_9&0ar%qPk#}vDir4bIT3dTfY=x$=;Y@h9`q1rIntp6q=rV{qt;rBOzHe#;oml1@ z;jLIkF8n6GQ0gOal^X@!)vKlgPvHfWZgj3#+oVT@uqcrR`Wb!Keck*D0Yey4Z2#V$ z3guvqiQI=-$J3~Rb06E4>$e;V(|3Skts=IbR9}WL_~*w%Q)0J0AeSb1@nj? zN`;vtDpDNrXGx9(FNS4IEZRv(VrFm9aAJKhGxbN3cQQ@#Ya$;?T6Z|R5Sf0*=*VU0 z1cG=L&Pyr6Ity7XI}-#U?|q9uhr&1L!*I&&(!|YH=Wxre&TD2aX3y*Ir)}mkOdcj> zS9HhKb&F_}$~=@pRIoHb?4G^Ne(WVO0mU0t#sv`IzB)hXzz*^H<$BA&HIrtxh74;b z*LJ)b@?0`4OmB}1hMRPU5n|xNAE6V$=aqv*P{L-wz<67m5+Y6%G zgA0zIy53N}eiXZ5*j4`e6%)!g9^Zf&R*Hny%XBlYzzqztV)x!TC2SSm>BGI zCjxcPcYJB{)p-yDk8?j}4s>4}(ll8j8NBpWyDNaz0UfS^gAQqt5VVq?ycXGkeOzIo zS?S`fGp02CK~Qk4u9D9d(rd}Ta@Oh$!QSb1bD>@<+co1g;`z_o?MkQ#x*}uKW?Rzg zVf{LTmk2JO+I24U7&GvA>E6qy2ajzkuc zl>(K|+UMG^p(`EatQG=sW)VbbX{OJxX@9QOCsY_Ig*2sCtV19u z{iGUebvdf&t=?IcD<`fiBLy91@K8*;e&o+aJTrOM;}%`?lv$O7+#M$9d;YU;;!amv zh9GxE-XC24Ipm81uPF^UjZy)YszIg)SmHTV%qTR~TpUZWbN`W5_lPW>sSrKtE>cOI zK2y6Hb;G5-JNVJ)C@Ncn;=8AgmN#z9g!O*c#!=Epfzg-_ZOJINxlSL^Q$HTPu zMB^xed}`x+()7ue8^wX+j2Y7PmSA@)W?MdlXLMJo@2;4BEEE@zg>lK}UkYwHfD$~C zi0sA^wcXBcMTxb&2ql25ejj*jd%pK($=XG$Grzr_>N9tK9EIPC#TTIW822E`Wk+b{ zoh)T5!%JDMz8m9i;Xgtj=`6Du7;zDdD{COK_To3j-#32TpWktuPq5rZgM#7>1a?>! z+LGO>Y!Ik0?YnU+`)0#edtat+I$T-j-yD=+Pb;{1=0J$|??dAiPozek1I@imisXRH zUjn3jt0d&^V4<<$ggd6~r-dZDA0U~Y@pL8M&=9N4tIa}yTI8E~AaK^g|CjPhz)9p^ zCGCeRuvV-eZkGga{zr2RrPKrn0S|}k;7z)^Lq8x*g~8L(^-W{jEtImhci;iR2Y#n> z_|0{UQnZ21(b_+~I-W{(T6TW0f6#_K=)QquUKO8W1|ovra<*p)r*7K;`l~7Vs~Ei| ze;X>secu;v0O(s!BM8}`ZhJ%mhvuY)5O7W^i*`qJP~`~m*Y|z+ z&)OHQeY$45+p{3J_W;mu>G-Q0o(-2ru-(qD>-41fnTW`n_AR|7Cg9~X#SoC%O!0}p zFX5EIuaNaNth*Gw1M+;?=#*vR@4Gtr1>dfq6d~PfPtb zdzdJ65Rnx)(8#JAdB;9c9mChY0dh%UQ&(6B)LKs!cMtg)cDV8zzQW?85!i+&SF)mV z83?9x!Z{9T!#(S?*ncB!5`#`>$aX{$@4F?wExr8h2ATfT-!1g{5eiLsAL1bAg?@(b zDHOf=469Yj-k^0t9K878wDf}#c*}@_U!Gl|qB{CnMntY}G>PdWaljsLXy4qvM?erS zp#e?3-mo7#Md(|9_KT6mIYBAO5-cl*mK;T~POxYHmmpPP?O0)DS|szND={MAEw4FW z^}g@klPHT)}dg9)dkXz!9N~7_xK{Ke7dGDJW0tD{b=2pS6qN+fJa;9X$JW zKnO8@xZGW;XXtgtX`imJ5BI&ot8_0=@7cMmrB`(42^cSc#lH{72vi+Dw(4>QhGrID zJw+z4ZEhos4F~J<7h`sjX6g$?X{xBu@st8i5O>3=Z_Mt}WrrBP%vdbD1I(5se-!tI z*1c?Yxe^6bnh1v-R(PFZhF(|3k4!S81E9yY$!#3#f<0G~U*Pso<6qPHE%y}h3_gxV zH$Mm2CnFa~s(_)*w_f3v>=E*$qm2GGRq$8@eUU!jFbmBi$p|{PC!G^Kxzy|B{b4f3=l)iYK~rMBf72%UCP5zkE1PueUfIi{|mOO!tCTH}I`38mbm7-7Q9w-6uD_#&m$eksHtPrm^gwWG4FjK;^CEE~1(V@u zQ+yVYq4Wi6klu5&n*>&yYmCxjsk)blj!HHJgb$a1B<4Hyb6;@Yg`qZt?uH6ialiV zrFAOvqIW`++h4}O^O#U9s3?5&vWRd*Ov3qM6`qy3byok-X}tie9e_lmOC6tbGa(}~ zUxq>mpc9s@*j6mQ2mMF~lZ>{L0q4@tdo5w}skUN-ETSUO7beo<(cxKyx=Tlp z01ydBN1#DQdhShGD-@N-KibmN4dVzN^w(|s9B~(PxjG%7^~zt>ZTowKT>5;n&9qX{ zrGCkMZ(9n?CKu>QMhN{vIwiNoBpj1|wcXDSb^I@u*#fv8h)H{mOt241Qs3>uYRf~j z3C(TuO$3%16?fpK+dc(1l9EhU7e7^r#{EhOgW4p6^}HGr$ABoy51xuxO%cS-?q&q` zoO_b1v2ERDOCxi~oW=jMsHAiF2hT^5f;GYn`lD6%@6a}kYNCKG4E|z#|Pc3|A+XJU#tMu87;i9peg; zw9p)2+?0Z+wOK^Uu6k&`f{~*`D9 z2N5gU_uO|gX_HV5IcxB`=iS^A^XLMri^*N{#k~Ha+mTQkYZsHAkcElhuZpxIHoSt| zs8un_?oc5e%0WTC`>v8gPb;mvlp_*(|dG|CjRh0Huq}H!nMQ1OD4Ty?dTa=<4wlVIoFs_;J z%Z@rKVXuR7qmVTT-$|i5o{+r?M&X&=rxJ|!lT4QM;xtLO)#0^<#=8?Hbgqo@Y$0{= zSsvLPfd;`CD_slPT(^-zDq>T~tR4=55U9<0CdHqABn$0tXO#@P1V#lxce2ywwn>pN zETZtDNqb+mGBR3#LZdH7){gZbxDMO7 zz3v8FzG}`eDP^$b2J?P`!| zoo}^AxTB4#!CM-mF6$A@oI>vMkb%j@zWQ30fJF~m(pyLvM(yS3L|O382px|fwc z91#jN*trXT9pe)HgJJ*`DAIn=$tIn;^Bf-idHCG{v9s7hRiNZEbdvX_1fqqX&&ulW zYD;FSF4#r(wlK8W*eE5548=O{#btvQ*=+OS!@6Hy70OLisYp^XlCYQLb3voMbsbhu zx|M}$C!wFvsdjV!L>GcNn%~6@&|Xr=lBYwQ^hmZ);aOjQvdbepDu1rotx@bP_C3%KwoT#JmMTN5~`Db z|K)zO12lElwI2%HU$El6WMU)bFBtf*jd1M1q&nwh9{;=uE#X^xc6(8apIHY4&IhdP zq8*E&I}=;#(gFUsp$#V;G2%?gw4al=_{}y2dxw)BJ}*~RfK%gMG_fqdA8%;5r0mo< z0%28#Q#JG~NQMj)9dp{;aV&_cLu+2HU+MS5PCs(QIm-pQ6=n$Ja zM0RWn$cNHbB$Qny2^*L+&LzcK1PQ$NUOQ&tN@A1x;&X_UfKg6&6?iJlW1SY|gLqc! zV!3PgT#q7~uPvClv?;;_h%7R)W;??S8yr~blo9zC6-Kh4e727uJ7_Y^Kg|o2oQI}NPP*)Ap!H7*>SM(zU znEK$`_PxwFi1Ot`?-U>sjb-nzI&#V7(Brvi61DY4-8zdngX=PLnF_Iy7g=O)$lH1I zXh)i4eGlna);^mb*3g>5o8BNFe36%eaBC;tQmXdA^+@1(u>1R)#M-x#O-tQLpDv9h zT6fI$rMZqE)aTL~qJ*Wk?Tka$-z|t}8ZG_M9S=`07x@0xsguhHG-sh5NCC@$96jAl~#<6~edd)QUi8Z>Jc^{-H za*53(WxD=-x$W~LYvn&5v$TbE78hM`c5NwRgM~(+;pomcCu6fW{Z-NkL(ku5Xfn)6 z6J1iu!vz;%9=@*L)cjgUS zhKgyh^e52H?=`E$Ec8Wks^gT^cW4Ce)h4SAJc()Bu3b?>lOk6)XO=fo0=r%A3_(t| z`TI8niCyR66q2Cf`6P8AC8zy5t-gz#a+3SJ;TDyId-sc9Ola@C;gKbm zb+=bSy%U2ZgYqNQLLr#5jN{BG!%#lZ#+eF#fl6`s^K@`#H&^lSc;a+U?MsrmGAmgqS)9jR0Ks z;Lku+SPbV7BQ09ZIJev7eWi{gyW#p3T?DxZ6$;2GYVg-fuBFZeSN;u2L-xr6l|4Gs zAI;(;#;#of&8~2EuTNT$?K|XBJDjaFZ&pW3#$2ttvFC4)&%ykB%u2+Uy;b+RTvpjd z5l`KmQe{Hj6x%K;*Ilds6M@Kk_jE8IUd%WklLd^B5m2!rG>WYS_o&2u8sV(4(C~1< zs+&~2A;u|`Hu&}sol#k_LtfHdo3+tTxc#or3+aX~e_C5dKl^loL5gGz|M>fB{uJua z%|;eR3)={EDU6Tw>C=b?H_*PiWvbB~M(&yBUej2(%1%XtvktGY6r`k%n@wBG*D9&} z&|We6;?IODCTOypuFH0=Si!CRsvqx&BrYmp)k8pPe(HWsXI|KXo1$>I_onJ@f~=6w z9bZhWRZEEYM(>!A^VntJIKBy_!qB0y=)or?US73~ht=W+QyUh(s7_eEqXgiv!)|Y0q1-@nf7P5HKfTl zX>D#vI%A~Tv=8Kg0OJP+ZtTxKDs`%&0%DI^7wsluvRsE!vsFA*R(VnrvkPUsm=?)q zp8d~;mf@2*nmLA+TPE3q!(?Dy6$45cHDFeh=Vn3T*D(75lGQq9pPE-BZh+pfcu%e# zJeH?3!zC4|aOV<(#+oLFh^@^$&eBKD={52ugDZP5mr$hL#QV;9u1TK>g)nsMmzcdQ zNDbE|Uo&f_2nTe0i;P(}ZDO4{o$E@;yyq~;WBE{%?-kH3wD}t7KfO!aW=;q;T;Wwe zYK5k2KPd9WHlCqYK9XL-$xF}^tK*36tQfdy%6i`JTW(g*GPrUCALWf49S&)fINdT4 z?-SK$F)tvh#*AxIj|Muh$%J>_(JbV(V!WhVAoA?faqlitY~*Fs1)HY^qsml#6@DR_ zQI_wV4hr4bqdhG>Z`BGi{-HQ;6CIwidPi2G6}#PuWjNprZa7wN550f&CX>dU{v~n4 zsi|UjtjXwyHK#y1-;5;Gga)Tr?jXlq;T`??Dn`Bp>c{SV@3&H^SryJDA_I!uB37Dqs6iVdaV0n|>sq->7Qy{Lyv< z{nC9(Fc|B5&_53E(?pCiQC3^GVbAY%YS6kqPlM<0RM`DVP0etTF(qQ))BEuJ8g7cq z^pVqQkDMY34PLH`EEVp5Y8dl?1LUw8q zwW}eY6#_s2j!Gj7VYXbWHeB!X?G}WqYIlCaaHd@AB5hYExN%9@jd^2ixtWs(;hihk zXIE*#1a1;is6xG1JyY)$+wsNs8~qP*S+X07cGOiXKex-l6gfi8;7EFAv`!ADEZl$` zws8lMQ$8~s{q34d8#ZoL1Z_Z7c9V=0w3a@?ZiAsmSzurx($LCT_lgX?qDt*H(ryyI zJl`h2nR-{kyz+O}N4RfAbp1e`Xv6lVhO5_>Ltf{FMq2q-lb@a!meSsix1^+_!&NV& z+lhMK+=KIPY<)&A>vwFlS1fc1Z{3>YF+EI}%k|z~SFhJ?AImen-i_b9LD3Qt4tu=QsJVLC`}kZVf}|~zP_{K z%kjotaEOR>roTv_R*Y;?4)>EcAq`?l^ z=BsCQOOHj`W3Acbp6@SPlT|rx_h>q5w&J*gdLC-Uu5JCvQ#YHSb=9`nnuc z_)bibHJb6sKUKE0LHC-Wo8<6Nx?~>VV{%cXW5w5okCc8aJbn*-EM{*1y4A_49A;f7 zYSZS)qPy{wEurwQR(z?b;$7v^9|*2}^UgRteddD|jaDLBf*d;0wP7iC z9RKTHR;yDW{VU?;WA&}Rp=&)Pi&m@MPsK;`259Puk>2@L;d0(@0bC(H0~Gx)f@?7F zkCUKvM&dhIEwxv$&kc46|LOP|!BBp9TVTR`SsWK%UIf5~pm8M3i=wGo3I9LF6_^wy zhtmB+ns#P)<^Vrws+L<x9in@VqSIEHOJ@B-KA5*{+KS;sGq%O&oJJQ@9aAKnXzpmR<4n(O7i zi2sZ&GRxh;VG||y%CnVOc~*5M3_?9)q{KA!le7#}P<;=-fVN?=1cFeZ>KTG<|GeIe z0d3hs`XE!T3?k5KLv&%U7V}holWF&Elpbs>IFXPY3W9T((?>e-82Nl?9HMi(zB$I$ ze9casR?QoWUO3Y;!#25JIrMDCkd{gf4k7=eFoq9|-!DSx@6H%Nz2;NdSIQ-Hbal z_)rndFzBx4;(`5m)`e0KA>2T>ZpkJ%R=m0cAUvs%!agF-?uz5}*b45g=M#RrwYLft zE+r2SH%aw4{4pBrdIe_o4}l?a?EJu5BrLey&T5?m!>yb001t~-WDvI+kE3tsV0lfq zHpu}1jopORenOyd*)z=k)8#(6Z4u$!(i&dQ$#@mFfiSz{&c3m^_6alkJZ+XFFnS(l zMEPcp-$7Bp2z>K2V7N7O%O z4~Q}h47&Py!qL((=t8qg>bQ`#f`{#kg$?kAsnsJd&DAG;Kb}@PFt(+73rhd3RDFbD z?Sa_N`UmMoYAYrQ?hfo|yzz*v`+@>xeWd7%tNEjAf#DQ<(t3=82Fc-QGrO90pa(1R zQQTP*fGR=7UfevC8`)qfOl5G5(izD<=l&+UQ@7~59HW|>Y8d@^blk;`eYtsTgl1b zM`A(}v>YbOS4(zCs%4kN&S`6pY|dBB>{0Le%vI_pF>o!DCBgTAfk#iK1=Ta~VTU5y*a_##77`7xGzzNclD zHy7>#mmQe#99U>;9!9ju#R~J+cM(3xTL; z>QcnB0A_VCmQanFpg1j(gR4>q0io*@%Aa?aLH8)8Fz0xreP@-5n2t9jN)}vNIcc}p z17}hcEj+t>K^RON@xIlHiT`6KR$jB>Z~!D6IS2{)YKVl~RX;HaEOEDs_x`k^gt13C z_1?@brr+8&qsQ{zEiJ3n96j)*_vA9{xZaZW{v=sd|NTktFo4(o*XHOPJ=RFkaX;*Y z9(-S^`T>2Mgl2QX^)yM{KlsY!EbLP6H@hf93NAgoL(`7v@3ryZC!zRx?>Ct{*s2ue zIVu~3@&Hj1@=w1j4krq$y>tq#IXR{-9axU}7M@~x&h@k?Nng*PhbYbc>2uuz;!nC> z3S7{a+|13Iq}Ghg?zkZk|0@A`2qf)*pYk=WWEW98Gmk>-Ez;etNu~DIc`CSx%f>>zfI|aP z5d!mFtCws5N-OnO@8Q(m`c5LvinOFv2eG_T1dSO&SyAa0`M`cbJ>w_d!o&N!4PvRe zVPdNeFSfSWVGV{+w?FH9m2{i{5m-V816Z8v_rdL@zh@b~R=}BLj`N|A)vv(RP`Lpa zHGR$10VD%K-K|oS-kL&2-*#dqR5&N|hm53_)Kac^u98(XSd?mv$P$X5R{N)b z0SM_|qk~ESPt*FL=y~1dQ#_fru)`0J>EN7 zF85P@NiAtT@EM1`>~eD!76-J74mif|r4Kl)n+`TAokWAvUaKAeUEiIlhkA7ZB+H`T zV5NE?wR|*j&+M+uua+oT@`4*}wsTKbHFl8n!9!u=PhPoT*xFAp7__g-yso=xWS7o~ za9^E+mn7XNtH;$F7?gVQta8|s{uXJ)lKJBsCx5wU0Ti;*^3ZaxQwCXKR|Y!^me`^} zvOixHIPTY^ta}XZsPJ8#dY@QZpmLap)vAt$O8{-@3|#gy5y?bB<|oFnWCV4WJ`B*4F<8XYc()O5>P2iKZAibP~kj$XK3cih0A zIvIj3SfA5uFeH0&cd45W)`^v(p7;$`L8k?M_7{D(1FuyYPBg8hr$uV`LRPh897Vur z_*5x_P_Y+zV0IYk{_PzUrTe4+>b0MB$u_=c)T1L+p;cj8M3Wb!YykY#14`r}@S{-W zgb>(zYau3Mq=u0WN0VQYR5Gv_K1ZFd+>PBh&<2MJfLj0sJS5ucCAm>6mhhE8I}!ck zCzLNeEEo%@G97X!fMf#4^eTp#UI>ueqG3W5|^*1z(xZTIko5fzhFVeln=C6 zIP6^!$X`q8za@|n!CK<)L|+i42qlT7JkRf*3^zsKu@|()Wis5PW?W0(nxS6XPsj(% z7+gKK6I^Ub7hYgrw08Z->So;vm@~s<*U35dwt>3+riXV0474K;cvl zepGEizCacgiu9}$z+AOrx&Q)jsb;FG=fl>;Qi)q%W>^K<$-faxYJ5o)ONpK{^KB)6 zs!%b<$?b2igJ=L4A%PZirv%GxR*ZfUTDjSZ@WR|5D^I^>@8oBT=bMm^ILE#(xzVwgVA&biB1=nZ|FCA8o)37F3-pgWv=?qQI_hl2^=8tb@QFIcp0Q+kMi% zf}SeKHT4;)q&b^Eh?cC3RVpA6Fann-D?yyl`ti&0FhZflOt=ibMvg*cYDF5k2Oszr^t-35?hk zs@xIY>m9rP59OO#pS%jL?SX`q6oBzA5srwa>b}e!=0RJwbD$Sf9FC>~=(n!`9#|Az zGY4<%zu>lwbnno4M~|PZg8?KS8*%*<8_@{THT?J+1#;MXz!dNLL+oHLS2-3}i>N>1 zhu<(g1RF~**!vfdfa?t7;iTaD1_-?bGI$3Xj#lKp^hf;otQC_Ro)T(i7a-52|b8K*si8`~c$(54VN|<1t5ra^nC1s)c9=Fd)&{i`P$4b$Ga`F)(v7n8+Idg?_<- z+10aon*0X7CJENCtyp2ThQd1$1J0X1xxbvReXg!^wjHMV!5^!=EbRK;DX$KXpd-Df z)9|#RQD^vRb|0&#GL3o)iNdEoTxCyU`5yI<)^q9lYsIz~+(GUx5EqddnjgW zm)HnJ{!&=@GsE`zYUK?K+};uBGBgl5)6ziYB*GVQ^w?gr6)^?t@-8Wt6F(8!z6lzz11{qO+0d#b6I1c ztV~^4Y|J+qBpU~}vEM8QVwClozri0Ny3_mSb;1Pbi^vQkTdujfb6_rgn-T8TVkrXf z$QB-129G9Nh1U0kuFLi{IfY<&StRp279a;M>7Ia#R9dkiIpuY@aY&m^rBBG+PJnoH z5Pg&&zDqjNZWOqu5klQz}D^6#-X2ju(sigO|py z11U}fGq#^d;kqQkySaaSMq78>DA=PgL9>c3d%)zeoLiQNru6Ij`15G14V(z*=ref& z#Ag;6PsBl#s{H^3IMVRu-VGXn_J)VHm=jiAq9#0XNwqQj&h|T=Nib~}6 ziLcVT9aEm(0h3=VCR1R!CLQ)QGK<-h-I-J5!r^#F5x#e#3~;6k|GXAbyP+@M*R3&*s>!=ozI$S6MzfKU+|JeiW61Lz)Go1&5}?L?5kCCyPDwPG$uneo81SX?Q4iQU1prEjhp}FdiHzLS|n9J&7Ev+>tURuVsU=&Z<5yU3qO{LZDYlh!fsXh)#{|%0Q74G&pvUO zlIijZlJ#EZZ>GJ1_{2+DOsEauXdHN$JTo)eErUInP%6cbKS&*I)?ll2?-NDpNlx^K zc&nysj+=y@V3^}o*TN6w0V1$39PNtHEzF5`n%!3$l%sa*M1tsxO}e6B3_z zVVxgXG0~ZPimzUfJo;uvFB03-od0zN=#=;+9Dmm_%^rZ&LbM&!a1;+dU+`H^VD{cx zakO3CEPuXF!WcY09MeKLJW(2^MKZ7DelK8*#IqBC9dDk&0PNOubo$_9G5Eqdab3z! z^Xs~muQnnM#?!eu1|=B909k8si^HYvl?VGFH>&uO$AooA1J1LFbsL3#y-y7WX->lV zGY22FNrK?Vjk_G$86bD1K1cKv7>h%&cjP(%1AbSD5w>G0dzQ!(`b=!#MiEnRUGY)zUGVNKYI@}jlRo2hZ;Qp zi>u0f6a$Rm9sr0%w@3@1DR{XjU0ZB16&^@zdvioJ#mhsE7lu3!`t`aO@5ttxhJIf* z(9@~=cG%Jfu<(-32)^yJdf@WMchKE>UQBitD}fN`>PKB}}LZ2_;^f7BtxMwNvJ9En*8Br)$jB8D8D7n zC4VTN5iYGDtX%cfK=?D=8vr6=RJ!2k;jiI{|E&^GR^P?*s!R8;Fks+OPxCw2gZu@J zC?8aI+CE@5!LA%+DLvgp+V`v#i3%p>P~O{Cn0n!iBtUCkVPP$`%JlICHI3j&tpRS% z*!9jD7mu4==AtmtBDw0me)s*@Qlr&ao8b0S`(Rh$kiy|gOWe6Z$9r)V8!UCqU|Lwq5W$ zujDQ-V!?}TsH|5uqDntZLiI9Mm#p?f=lM4{>po~baYN+rueQa@%XNM1zHpE5zc_tn zRTdg>gIqVOAeNIpE9+6b8foCgNuU|8D}pj{IW5Z9>zs$0&E7}eZMNwxl75M=zUZWr^$u&rI{8yby zIWtPbdm)23FA{nA_Ltq~?}3EuKf;sNa$yfR$VlUVQSfPqim5uG(#U*jzNB z)aBBOUo%3;!BOA)pZ(vLC7FK{c(i`@rSiFZAQSi(kxB<~e?Vm5g`B+x;!}3(4!>uQ zq8h(bQ;KzYtSAG-y6+h}J5*`%+BcobcYC}gIw4S=V@l~qe&J{d(8C0moh>8~Jdy~V zgC-RK$tskYO+tO{Szu$(&mYBvYkyaj%^UKyf>6=Q9}YFQ7$2 zv*#LJWhUp63QMScG~7qf0d6h*N1Huf`Y%MT6pF3AF>zVm>BzTa>lK>4#*kGtTM<5UDXW?tSj*c09N^QGX0rX<; z_53g1dLl56Z}zTF-Y|oTcYvJcFb`Di)|KY-I94QjAsB zul2)@#-m1sA&;97x(@-ro4PyahOP^Xt4;_o}7t-%503c!()pPsF1hT};rq+{JIYChXpLV_;QU#b!+BFDR?#?|Q~f`y zn4zzt70x6Yb|oCXK!DI&Ws^Seay^qUYzF6i_aR}I_^j8}SBI_{QJWi z{2{f!r`t7s{J>cUW$Ul=uv&0Ea1N9u9SIt?xubLWe9ROwg)jJxf0|2nO}(?QnD^NW|ju2X*+ECW=~J2)5N8 zzi7t>j?Z$9(Pj=9c|M8*PJy%XYXsD3C2QlhMDuMEdz>4SI&Nq@uiNt8r?23*2&sa? zSm4xU%VcEClvQAQH}^;BmNH__bQT{xt@UcUNh<6PDh5%40?OB2(0MAq*1iL* z#rVbeT(T(cmrgf}$Y8bCNT3sB56=33rmVuHEA%(>JPSnhvf5lZZ)@}BE|XK>{=#*| z-N#HUa#unW2M1c_>3P%*Z)Da9sL4%!=Ulwx-Vd5w_~4^wEh*C%LCqjkM-$hQ%5!#~ zXyllZTSm;|+scKw-uc&3G}`tl`l^lpOz-a_@tZwxro8Oxbq~-}=WN9VKv8;kqT``Lb7MtU~W zhxA^9_Lyr1Q$5atvnde?QZiR>C}p(b!jqPK&f1PEOg(&)hm=hbcGSQ7{(;Zqs(UCD zYeo^k(d#utfM52cA4I`w8M_ZRADK4rw$irHdA#geoJ($b+^mM(99^kJm411R+vYdL zDhn+wL81p>R+iYCYLhDLq!B>P|^?-F3%3-^z zUU}X(YL<7Pgc=vYAcZBs#B-Y^gosEXu?C$g<6G7Kv2vb~syPwqgB7$k@fEoArWs zIWmi$5xxKuBrJMA6f|0v;|~p#g^-#GFQsp{`5Y67}-dpOfOF#DqOS8m5Cb4 zXJf7R7;5+z{&0!BR3OYolT#$u>1K{MODgs2&_lnVqp=aIW7djn}z>bJl{9p zKjX}CoH_6N?h|)j*L4qNqwz-|9l^V_kfWV#y^O8gY3mn97VIdQlTM2<*g-j#uYztx zVtfO2LARoStrEBTx3_zGBvIUzCIE(V7$=%$YDCRX&i?VxbN6X5IXy9vi)((XZ@Kla zK|3?MnXFf`DCKJ)DP!~ex3iS3J8d7ImQHwa(_HA=rS3LRiAmT#yA=6tYZ=%>f1aY>6!!^rJNL9`wHd$v&w~FNTz!^c2y;iGGqbY`CDda zpPF;j!82H%UI|A&vf!*<3}5`W@U$1BS@Isf?E*}`nnvakuRwLYkaV7$I2ARgM* zs%-A1>sLQ_)H(dV{Kp;#Z;zVM^E$n3$a|IQvxG4;++L(dd63e%LUp=f&~sEHtKqaT zH)8(mvq*RLr1{QyplKq#|8juV%Qm|=G#Ii=>pOz8<&%9PhU^lZ;L9@V_du^>H#{GZ zk4w&|7Cbd+#Q!_ZVfp?(_#t&ePJLNt`1S{S1q`5Tkdx}0apz}MNg+d}4(X$$!rf3< zlGn*Q0YD9zyQwohJ<$QvYlhi&Ti0(6GZlXJ2o5Y>XDj~C9Mnk z(&i0O{P<$CGi|ML?p?A7d$Sk0qjSTDkNq>@>aP zVS3J=w=DAT6T+;d;>{jTrBYof0lvnEo1lcbA75Pba~d(bpF6$2YpteoW~>%}ouNEY zUwn|AG_2%|l4S`Ma859vxW;L})ah3Kk!i%&4C3yn{Ijhg1(`@)X&om)Uc~I+x*$Y3 zb$9iA>MOFu(NAC(p0(%rg170X*Ph(MS~d#0!^quLH2*2TW~Gj*{e#iX%AO_)zk8V% z%c4&zt*QR=0oxRM`Q~>*=h(axT~Z0k`}EA&R(30Z&cxKEeI9IKr6zHI=frg&1Fe81 zT(~Z)IeX<>r?h*6`eFlKA|EYubcW=_f39R#I6Hr6b}aLJ*j3|TvvLhjuv2J~Rs50q zVBUCpv3lm^l2lA$)qa#f)rpMqd4GpQLqK<47VmLK|GDYUQ2eNe*Ut`66BE<6h(^F- z&k>j*JkqWte4I94dHsap{-yZt!h?SQIw)wDDk>_{OPvr!%$4AmI-bu`rj@M{7~6X- zSO$6!HI8|vx)KZe3EAlH9p?d3iIULpoivyEq!f zv7OxHH~i?v9##U6aAlL-NlK2DQ|(y4PuL7ov5rd7u;=CrJjPx85HI;lg-AZ!%bI6l!sc-IaECck{bDfiiiuPy+q+31+C%5>=5q`YtdEL z&fM0=zPF2rzw}(Cs=RgyA*0-PbUit%9-d@M z>wP8NK;@WhebqK(P|lt5Z974oI)LUa0?0NcYhmlZc(LHDFd-;wgKM+z zMRjj*n-aF)Z`OA!>D`$**w)++u0@r)r>7#9s@clkIXlWmH)%GU$GS>q3EFip&E%mh;nK-C3q8qij7A404h$-v)%Zytu!l_lMy=J3X1zzk@Dv0ld5`EZ1 z`Pl%*OxiKB3B+t1SWeG#y}UG((v((g7b7B4w`s{=vgNUC9IRi z&~rz5rEu`g-V94FsKTk5p;Rv|)_KjD89FUU8mZB4Nu)8y~z)cNWR1tX*l= zyB?_u4&n;O5`=x+rle)cJ1et&;qNtRVEXZWL;<%-Ci?+Emn~-Aev0*m@86rnZE>X69rWY~k=URVwtM>k*mYs^9!G7FVG#!DB z^Y-N`&jqF3)V>BF-`RjE@uT0#&G5NHh6L#F(Xibe9Lkx}(?qmvJyB#X^jqVs$S=gC6Tr`=jp zKL$SB^o}d~a+voEnIx;u{d;FxkWg5RB++2Q>g$yyQgw|9owS(1IvwS7n+q@O94(1v zEY)KOolD0;$Y0JGYZbLO53QCz-&ds=fBfL{2*YDgnJaOMf&b-LhF&1>x?h~z_Q1j))O#RQXr$2w724Y=Su-v|@7dm3OI1Xe!Trd3` z`t)*fHmRkfgGQR^h&ImUpfOS}TN(4SW2&B~qs)sxPc0qzhHc&UC#wl_a{U$T!LhaD zep+htwsgsHnU==Vx5npUQ( zy_ECG4Nx;(_KF zC=KP78R$KKJb^6whBooHyA7FDDOrP_{>7ZBHo12N_SJMtwCy~JIu+uZ0WqtSFbIk* zMD1YHZC~+13OiYl@Xh(&oZj-cd#Ft{pcH1cofPa6D@+!du5#=N2q7|&uKrVE8}Y-w zf{2Z6@%2O5(j{j`tq1ZZ_oh3`-(H0Cl(ZMPVI`US*3BnubCNr`^gG>e?+FkwU0OVL zY9INZSV!)vvRV}`rEV~%^@S!OLby%MvhoQ(YxD+bdHvwH{eoseyP!1dpMKg{xiXZM z|LCy^ws%;qOCEin`8NeEW|bE|f%w|9%u&KW_b$Q+seZ7anXt!vT|Q_JtS&oo3-2ls zo(+ebxp$PXG@Ig7WU!eI5R5h3GqjX?e8|(C>eS{e)!uoZ*~E2`5lm_Fa@t4m=~uLE zg814YkgZBT%6f{;w(Ch|uCX6#d=_T{lS$M0Kmp)XtoNUe zrEX;pa9z+d+QFL@KCC(FzVmuGnELpy`AUlWhP*%Z)AYj0o6lcA;!D7mC6Q4kUrBkt zzKXWi%_jbmT&PUV<*t>be$k-Sdwnn;eo=AK%;>#3XR?>ylR;Yev{}U8Uw02RoF9nh zaZyPL=gYRE-{Z z(s_(fb%mp}BkIqZEJsJ`cI6$+PHQylYVXtbh&RjVkbQ}+y$;*_g_c$f*LMkQ;X$5% z0d^_>AQ31uQ^@ty941arIp?0VN*ApN;nbSBX`t zIZsEe9zf}We3+R?d#*`y0Bab6_yumdtF;zp@hGu||g z&}Q$>_o!xaLk*syIwK2w^-edAIrIDGRD=&asu*3|h84q?YpGuT|GyAa^(q1lnhThm zCR#kOrGKyD3F>quvZx5#ioG0Hu!~TH;P-Xq_t%L+rYHX~^ESR)(W^Gdxn<6Vz*P-y-FjVGklP~yA@2L+e)rks-(=^> zZ!bGD-#fi7K6Hogczt*G#(qceY_h|eM@XqsNbY6%DMJduH*zwAR>R3+?9OxFvFd#J zFWi3r-@X*FFEdj1gF?WAlbBD7YVjA%D=Xo~F<9Tkg-t5&!C`;vh4|yzN;S1%pQP9X z^vy#kX)48z)<#^I8Sy2)^E7{@8ny*$3;&fiwVoXILikx~l#7r~r-?JdpXqhD-8M4v z2NTcoyINnIhED2N{v?TZS}Ar$ebV#@vO_(TR@4Ed2@2z=@41UNoe!-+S#UZn*RtJwTZaON?Up&ZX(ei zDCrDF%Jg5M2%w8F78kO?Pb@E4WVOf+QGwNFaj*~vQ!@86KUp<+G;VDd0B}vqc8ue$ zSys=?9;NB+MxI!?~LUnSLSKWOuLA_LBh5Irk#^va)V8a&w8s*M&Xc4LB#{W+*& zMmS`2I(i)?)^t`}n*gY*V|%njn-D+~w$I{EX2awB=UqP$t~~+$$YWYi9q#9Z+9h8`B@(hciI!$dHPZ0Sd)=0@Z&5jj3(L+REt*^-7(`A?x!WDR2G# zN}(48%FFy;7Aj1tva$uX9ao6=TH%)?Zco>63N+;5>4leqeTaFXm^Hgm5*^uubx$Zf z#_3KuJFI1=$fOg9ai}}bY#W!nW*mE&wx%mMwkk5iIY^RF(-^)Lq3i-Zi)Hj&VnfbO zT|>S}l+E7k_5?V_*KpWNAe<-jjF=tK=Oi%P*ZQyFUh{GsPbY(}Tq4M0!e8c41=;_s zVhd)NP`G(*{Fa_yvW{lSI9^_vv7Sn0FUqo$-{VAh4bRSSGi(l@nrD|cQy}RfwOh>= zdOGQVxDhrHi#}+Py_Xnz$(k-zT@+OEytW$Vu9*4AJSwy6QldB7olDw>1mi ztis&L;K|Fp_DAuo)AQHu#B37g?%J@PpVm&M+PisqK`XC;7R>J*Gv=e@UnNEggqGKX z0rFRxfpsFqR(H^o{Q|9in5JhkuVHa}jj&*7Ba@++bJAj#t|7b6g=yon*3l#Nj89i)@~U4I{XGO@_)v@ zHbKrY@T?n8L_qh$6+ZoD3Ms>z*jybqkzsLUyoV&Qty>K%B|~l7>6+dypG6WEx4^?D z8RB0;FKGTf7T2s^?HHBx{^z;9#k0z0Y|H|j*4_jQ?LX12c@1D=Y?V6ka9Rb_PI#D* zkzraYStdwR?ESrrEn}?7^4E1FHyd+92sH0&HuGam5ILO7jvZHf)NF%r za_aCMEy}Zs2J>(SFD}gL9{y@Y4StFRivh11?uW6K#dp3##9cb{-94yaHAdf1ZS7Kw zv=)xtH)=ezL{dQ!w3Q0{pB=GOLtv1W9hc&uFQ=Y+74>Udo|d$QBOcG5G*|ioy4ce3 zfp5rM-K%-$Ing}fUt8KB6P?wSm(PJrt5U{AN<+rfN`ZB4xSX}?X* z7`Q_X^322d425UK)SJ!Ee&<~;`$myS{`ctMWYH;2tu=yalknkR;B9z+J1P)jegHTn z|Bi=!Wsq+_EH9=S-+_%GV{j(k9%s{B`am0!=+(8LN{=J)W<;D4QcDV#Wb~=zMBwx+-t`B4;C}a&)~$6sT(YHC6_qL~Ml*fhZUd zkDMxB7-#E&v6AQ(nl7z4SFoq+j>uShEcV9~&SrtfaWt&Ghk)rOPpVgLDdzdct9~SvFFE^76bxET zspK~|TRS1=sUpwZoR1j=-95mcz2P5eOd2WYn8OMXy5%HT|2P_-z`q2Na~QI@j57|p zvmR+WnE25;9|(y~*YU0w?JXg+qLHVB8xka}{4acFf0peV*@$$gJB8ZZFK6KG-Wj*G zh_UVD(3IS=RR8zzxD6N*JiIp2cIFxd*!xqtG4ZKVnX;(v>WDavzpp;EU9f(>nOWL% z>EgSlYZ^i7L-1k@KP-YG4Se?9C8Q=O%F1<2nFs=nApo{$qG=`{wiJB=KCl1d^{daL zaUNXpimUm`HV#d&xvy?ePkU+_crDK@UgKlyjwdqmOH9Ew_9UsDg3Zr4{0?G=FA!OR zI;w(Ca3NlXk*y`omYq%g#Rw^IuPnDPs*$ff!BQVw(D61$LM`Cje6 zK{rk3{C89r4$K6;B;o%YeCxdPlL@-!?!)c(+-EJ8J*HEsL-?y8=8feS@E498Sh95v zO+5T7PWHc^>lP=ExEL1i>if{t1U)dFEFzjB! zb`^dJ)B%A=$oXfWzphW=P-QQ6F(C<-og32UlDCG}7+hwW=V+znaOvyzeITH->GITM zI{RQ(a5(Ud+UU@y1(RGCmxQ%zrg2M*W3#Cu-A;?EAFM2ES&$$p=l|~oKM$Jlv##JQ zSGH)Pc!6PAAQCbZuML}l|B_#njcqNJ215J+C`0t0BX z)f!1Mv%WDzGLzG1Oupgkd$)U~=@SvFuSn{odi9yje^<@u$XrDGzW>#c{Rcd_1)vgt zfg_gJmNWYItj+TS($lVm#X5viGj1`pg9GTv-|Bd{xBPfRVz>JHnn6bR^BQ8Pu^vv! zTqt;n8<4vD2mtVP7&St0gFXx=02xWld5H|(|55`{*(~<^&kTSTBwc(@!I?wFQ+>)3 z39Xt1ioP(SvD{hL5%Z@$v&u4(u7Mt*nvav6;4 zC|heya|PRo*9U@p;2JT}GyDCsim5eW&`=m_28)nJfi_juzP~){wEbtByXw|`k4lwg zC$BRgJ)!c*3(MJ0Va0bY)B_slCgb}Z&JP53fVS@+RXSqb%u!3Px>Y`wD9abwN|wbr zT6G4VS|8n43if!}NgiIg?B2x(WCC$TMsjsyE{qaACLx+t?T;U^y6;~ zjk$0IK%boyFP@I_E$q&|g~lDeo(Un(`B1CF$I|To;mj|xoHC;XO&@^;fi)06?%pA> zrEA6*-Dg-JI{TesQTBS1Xdy1<9Uo^zPw1{Ooj!b)S6+#c089BYgn`-PV2rL@ zTbH8RZldHzNsKpF`zfHjIe%<|{2+_TP+V_W^=B^K>q`W+|GyPua=@QY5Xh4q*7Ehw z4Dopavo)dHZ^1|0Px6M8>yn@MrJ1(`kk|&F@BNs|8ef`f`V_0&LcRz`Rs`o`=Vm(y z?s)pu3jA+XTBJwd_^)$J9+|f*6l~f>S!@t1TQZfkwX_*jGT>~+&B?Jm5Voevi^i@0 zgaihqq_X@Q4!D1MQ9{kOecOuFXH=WwPtURk*kK+-O231)4-KeE8a|pws;*STuUU_( zfu7snT{xsW6Yb@pKj4t2 zKJ)$QPz{>q_J&%QCQ}i%DauC8Pi}N<*KMqNegO5%e~VC@c$)hxF(oagGgn}n@Tf1; z804(qC450AzKHeO*$SVhr0&t}0pR@BTr`MBAdpU{*K=4RIord`_bVpdv4?aYn$&a-jP7}4jGNfhUZ8^(F zwDI!84%-z~@*tU%CUI$(TcirGWjCW7wLfil;^O~=%d~?0$(kP)65e1f0(`>bsIob# zGE}h-YdsPC^NJwQD;6}HB{R^!=TN0lN?r3tM9x@5wkFPNv*Wn0^9C(-`EGKo(8xT2 z#$d>UQsMnkMDKpWy^SF|knZ<&2jJKIty)3t>u~|J*Khl73R}ixYhmJc%}?WNFHatU zM>o;~%q+N5CcS3Ga;J-(JUg*EmDB8-SkGcHi7zg0$`X~4+a~R97f=yrA775gkk~wN z3bQn;NBxL(Bn1vaYxk+SuO8n(aXGI+Tvl{iKVaHp$=mA;_lk=8x9_%jpgO#A#{$XI zjwFc^%*JT0cOLJuED6du?mSW}?B1D$QTGe97i-hE?`5>ghntst-!xR8H!K~Oe{-qW zAPOfT=j{MwDv?EKglbXmQasPX{XhADa)f*+%5?E!2!3-ec#fsVwvY8RTC+s!$q`xD z8;mL2oCH4A%2!A7kS`&saQ#)aUPM?4BWkxgG{ij{A?_eG@-=s@wV<(0G z4DI}vX8qR7C=wec8p~hMHCM{1xGBH7#GXG6@!m*95OwUh!KaOaq~A7&Sbg-X0V5VZ zHf4wH_c~m(V~`JMD7~5yNejG3D5mD`t#Rq4Jnr8pN%7tS&NkgPi+Mvs-F|bNO zS*UUPjDJR(Shl<6ki1s4AJM0>tLBrvy_>B-XR>AL_WTAY2n>?tdTx+C)_yGGHJAaQ zN})Q20xQ~%H}C)VJMcA}zm9*<#JLggXwMb=BT<5^d*_9Dq3fM%0st7-)X>~*S*#}| zqKAM@H&jp2D~I#jq`J+D7>NP)kYTdQ(5ALg5r0N0o!QdVUwxdkv}>Bh zs`WI`=Ihn>Yb_k-P{=hT)lDSy@xwk4?vcI`j0wP=y?r;A(5 zkkERx(S3c;O^TyU4MZ&x-$1azKwqsd@C?r;e9o`sjsnwJcSd~?l);YEV`|LI=aew?NNs`t_pb-3VPL|G{Fv~HPsW=95QQR{(OxC0hX`DALzMZuaL^AjFi zU(&=c;knSx6^SPx=O`V6?W(>IIn>a~nv)1&<5M!N^>eTuai9ft%PN+(2MXn;a0V&7 zXXa6*pgJDdGR*TdR+1;w+8V)!iE=7ul^osvodJesAWaTA%gi6g*LEMZdXRLA+MW9r zA@SA!;tTjaP`mT`Nf|Z<-bmY`!mdwY6HM>vRVueaI9Ntkkb$mwAZQ>u zdTmv1pUf7Imx2X?w6Y+8AiF&o*{d+o$FexQO!TK3TSphk8HG0KXIba4K;T3Yu=@%u zIn!Q9%M^XKTq%;_j@1GI_Ga12ZuXQ1el?Wz1?sxx-$zO#>HgOy8LzC|QR@x{cYPQ# zbUauF99pE5`cji$4fq=z(*0o zrcOZGP6(Ow+GRyUn2NRNLUC$BJA1m*Xa<3CEX=Y>X?%mSKE)VR@G6#f+6fTb7P_Gu zGfxAXoZ&4akMKd+(EpO?KrBM{W#WEr>LR+YaMK) z^ap_Fm$+Q2plNy1V!IJzEIr-~`oUf?R@X-FR|pw3@NA$M>rsb&ot(@W^Um?V{2n;K zarm{=Zb`x;12ZL8Ch%6*?3JEb-r80w)oc$Wz!05)@x=mE-|1YpyCO$kPPK(rYiZnW zlry>!aMnl2Y;+;Q(f{PMb6LQAso&Jxia;EnPnT`6w+N3;7o7=af2eHzjy!V`NNN$0 zxiCkYIFkPMvmzaT8`5i=T0@aAS8|ka#|d4uYUv@Y=uJ7=zF7=rdstZkDfIC-nv~CU zz>I?XMkbc!Nup~2S+8-m?v%6wlGXbC8t4#ZB@Aw|7m`STmp(*km#ta$x}ilSf2~i5 zfu8tF$Q-VDNyUL%{xTcQyC3d+p#DC<1^KTqA4*e#O{+=r3jANL;#o*xbtYANzP z;yf*CC&p~ulXsP$>mF@yj})2WyS0Gyy>Mqd^rg)m^M8`}FM~sj1)F)P;1u}JK>GCl zqr(`eJL|E&Ow=;gWCjH#xA^OL(aYME)4ekBJS*M8^eSnc=f>k*rY6!`{D7#2m6mSY zsR_w;m*tWDp}exVhj5(w`wPpuCFrS0+&RIi7yP`RQ!P(QQzS4d znDcmi!d$zL%V5Q;m#)unhez%hD)m9idqvkWLYDsBwfK2yhcSTnw1l-qmP&`4BQrrq zM5S^wbM)+2GzmJ3bLMSe!W&>6S*D@kv$IN?B1d}F3p3nVUaNG?B&-YKi`ns!WKge? z`ww$Lze(AwEauh@n0-0Sy~Te!^|85j+8WR9+g-=aJE2}5jc3GnyMKGB-z(R7V*CN# z9iV-I!k_+-{d22i+q}51Y{y~{b({AzNxvptS@%WafZ@*wm#J zIHU^Es{XRo+BXvrz@EuzK+l!bna?8WK@Lp;L@PSNGeLMxa_%XG0MJl?O4!rW1?e|W z9RDI7gG1PaG!^5xg)~Tt4oHZ5NKQV_H-}QQHp&j#or$(ZP@p`_JiZSV_ zTI2LO?a2^uwcGR(2XBi7bt=;78KL2Knui448Il@`i<=9R5464b z+^2PrwQ5_fvQ_LZHY<><%-%U?xNd+bXKW^^mun&#-bl^q+8O7ueb#$EY#=te0V>E> z@9XXinaJxUE7AVAce5@U37yEZ7ia@uJXqs%Zk_tDaJ?AUPRmD?~FJHe-aZ{6}+ zyjj5wgYf8*^9Q{sSiGMp%e8yoWjkhW-_L~-*w4NM*FZNFGc!oH_4I104!s^; z%MO`NAN;3Wr;R+bo12XH&p^+yW6J%{EMXN)f`a^A!oPxk?^w)E=95=vDGLU@>e?)f z!-fdX;?4b7gDR;m9V33T5w`JWN4Sf%z)fJ$KtEB~WeLnknKKViUMI-P!@4i8Z!zx} z6>w#%{Q(;I-G>ZmWP_f`M*Lh+@-UC(nc6~Z8?4H`hVH;;)mH4>GdT#YzJlYu6k8U0 zz$cyIr#rYk{+B+?I*t=QY0786=nNxO(=H3coKmb=y+DZ3Vxflmtufb(T&%?!5KNkE|)awd*bMEA?CH>e-UE{53q*$!&L zfbQ{^+o02WvoR+>>J}5kPkDs5d3v0E-7sAAWY|Y;SWuTVUu$1#c+X5Vh^Wkgx<5+< zC|akmwJ&ma1RCG1F8PI-?(Ra{XOVyIb+zR)lmoB5dRJl{PpNL?iUA_8t-NgrV>f^aw(-u zYfMw0F-aX}ALwibb}%vAzAQ`4Xj|K(vR2h&h|WpV3#(Ec0(v-+=<>f6@jqGk5?+PS z4n$L1(-{PUXikL`B-t6T;@=Y~yeQ#Y2(QzR&Hu`snJ$y^x zj10UhVvEVOmCg<_+Y1tuM9>t)+}rg-Z^6I1eWznA(`nx~z)%+LAD|ymoOi(X=4HaS zHr`bU+OvwUmLnU21Qml%yEVP&x3tv}KwOA`g9>w5Faa)d@woWDlLx`t8=b(Pm#%gC zDK81a3Qs$M4nHP1VS|qHjbS7t@+~ldU!R$=!v+-`2$KEKe16z{+8F+0B!2SNWKmVg zEotL>7Kb;R^HIx5?*pyQvwrz?3B1mi#@E~L9wy4aMA#R`@ci~^fm8)FV=N=Ea}s%c z7yAbp6I8txg6CFne%mIhPv*m0LQ4{Oxz{22|AJiETEK~%Kxpcj=7|^w&Nsq=guM09 zP4DhY^tj~j+ygguh(#!W&qjW`m%&=Ih$7CG8ubR^J*)X~S3i-++joe8d9m>NCI8=d zKAPWL`!3i_;6V9*e*Eb8ri0sC%RixZU<^+&A_UYrf1QBp6&OHx3vm96jg}aO5$hO8fu(=o<7NS@5IBpnYxPQgyl7fWt-3SM0-F1qwl!HOGaNl8bbR zF$(gBbB8`WTXYs>z*f{WKyw;2>1@Q^8S{u1t!bRcqLF6Ek?spbW?$^(9TxJJRR;nj z;$00mQ{Ffz&G`p1L)?L3wSnyup`~Q#9%wK8-7vD5?;$IF9&qKEb;+onnb@&ec?9a1 zqmfa`#zz!e?x7&Arud)r0YYNcrh$n9Qx5A`LpOfCU$O-?g$5E(AqQtY^o;}2(O9~Q z?RIqepZh~ntif~f5}sEs0OZzm?Gz$F1|zZ$qalLxNi@2mu(>4-P^UrW6dADne2z&e zB&HJ0YDrJhvLjE2`rjk3hZ*GrIOBGeVrwmFw}!BPy|3d9$cJ%RApZzo)J6jaM(QQS zoEoV)*SeKVOpY)Z9vm-i^1tszQFyig34bO1^lD0%H%E!hN)*w*4gDF%zC+7U0tRJS z(>pDf*)SeX9)|KS+lCbYWAp41UK(B%S&W)PRKq;TcYEx!iL~E9dCE>j+Q>c97|ZB-c~x0 zhS8SQ)ZimKB+#+8m8uap2h>vmcl(i;ulV=3xN>C65`UZCAY52Bp5ycE)BI;2K5`Us zi=aK>ZXx}vf(M1FZIS)F-zc_>9LWA{Ff4qX!rGsG3Hi`ioMn=r(K8t#{)g;R_DiqD zE3T;hYi`e8D`KXF`8>#dvG#lZAto3SLL9~l9zr%v9$C_ZnnD*t?0;)Wg3a0OWp2{Y#odRTd2VE z-xjhOoza{ayEQtzOjzY!V=VLbRtDvt`Gulcsr8L0<-E>AwkHts|H_&ZXaGlcgMFKaSjBc)GHu4Q95pWq;inz-Z|N zautC9(dK+r>r9j?Egdo1edDG6YG^GduO#TXfec$#>76M55&!V;TaCB}K(Ou6E+)zt zF9VPcJ!F5ntryDaP`OFl4y?MB$6a!4()dQCLIe#aXRfcNrchSu-T~G;!jO&hFoExB{nwpu5-FCi2EKgIIS_Y! zefy3^tvxjAxIzF(`u;ngS%6ZUE?;)KQx;r&-@F_37K{gd902UghDzGG$?zsDU@eRv ztpL)c2+Ni4p{{U-8*&tuTq=JBNfi$67R)HCXN?lQ~?=_R%PqT z!$6-R|Lx3|_wT=THU}&@YZH-TPLlyyU>>H@O(Mv$>ykAjb*u~~w2_3KmXFyVH$mNo zzvL!qtC14B{Is+^3f*tl=sp+QRiy|RUU9@HNYFz*hRt-um$&d42W9z^K!&$M$S3V_lEIL>tC2Ha$W{kMcN|TDE2H%3yY+}j9~3R zxtSGLNHlag#99}>EPC8gcKBjs;LB< zSpmaif7;m8T^Age73E6^UMMfDpG)l!eKhWe@XMi$Y6b%o)8WGDrbf2+D^4MogA+{t z$}7Y;jKw-SY3)N6ld!?%5)@F1cDfVCaUMPY6qH4NgC1Qdi6=;;nV{-wrGtOJoYa7d zQGZ0@cNgV->@D7~5l%1-P%F*5PLFHJ}{;(ck)ho1FsP^ZhX zH-qO7o;}z|1hswDRrPrqp~~=Lhbrdga3UaJxCGVHo2MDVW>!3=aY?y@8AuyjasG-I zk`Az>L;^YaQ--#f)&liM6Hpt@1{g8W_-$?9&-ZkAfzt4}z!r2XDfs$p4>~dDNv8I7 zX}U#m&Q(GGbNE>xwU7qTJNoxcc*V_2=RI)8RaeVH=C>+tDnPnR z(kzYyQ~~>dwWkb_o7y^GmA*Kk1R5mhTMer`=pO?lmsm@j2WmZ6;4J!iXTI;QM!OED zA9yDW0_hvyTulK7>poxco$&6hdAf;SlJQ7wc;s|B<8walShU`CpzTa-*aZ>DW6NUO zSk(mcCdsJ+6uMgyP&-zQL-~2d;-XN%${3=IvaJu8L;(B+Kqm0G^OC+YhB<0;OfpuR z_2=VV?vOl6#H`BiG`D0xaJQUOtQyBe(j4qSHU)Lzb!k~ZD57YR%Qt8|pjtT`BnG!b zr{p(*{{(L6MmD6CS$TIfsl0)dPNRWfTra|^Ys$nSa9b~3uh;D5)~GA^t~LCoJH8Ub zYZc0gsqLk@GJm;;N0L;)S^7aaR6X`-v53SPk6jQ)6)_lMOF2;_#ZCEGlvddCmOcP` zZ9y9Rh87Lgcd;>Xi(gNR=>(IWo_gUAey7|5zDGeXCF@B?gAl)&Y(qV0K3f6pVX_T` znI|WI$&|ilq%)-33EhBIu*|I0=>lKNL%&&|)3xPe?yY9+eQ z8Ek}T`n*lO9*bNsx|~DXDHRAXl^HtRtdK}qGjXj0(}4^0t0)Nt=@n`o`-ltM3_u#& zCn(+nRP~wsm$E#I{rilQU*bOk_(&Y?ndk~DGj<0oo9W~W?DQ1z6JP(u2L=UOAht!1 zdkt_EgyFiL_0`7*-muT2c`5Rb$QPvuMW^O|R64NueFTzcS`UpJIy=G1FsyTSV;7{K zc0L}Zl(bPb)rO()M`DANc7-1SdU{+cFy8CAWGi|MSq%C0{}ls5~0 zWOcvTl>u0sdjOe^Um`K?TD2qA1%xgtAj;JTzrecBM$sugcL_pJ`@}eLQ+pWHISX)r zcUhi3_K?Y&Fb_*nFe%9cW2UvTAD^Z7o*c0l9EBa=8=TUPbPe$FRqhZyd05c+5ZTmO{r=%AO*2d_$Jl#5^b&1y?r>UdqQmWE z{DzZFoK?|VTUi7Kzcu8{^A)_yO7wP_fH$GM5A<8WJEsOc1(lU5?>~S0I+q65J%ZPu z4^;sx_?_6s;%qa02DegjA9kfGOmJZW`>sx!E8H1~yu|xna6a$P% zg!%7ZP?Vu|qJJrdp+RF?;qDDZkRNLA9KTpx{7m_7+zd!-zY4l4-L!XxnQ9z(ElgvX zp=;I$b3To;XL;oybwq53jx!g)80h`_A0Tb4a9?^{xt1o%njOUVh&W`SoZqu?{zVm* z1`cW&GA};VY$U>IsPk&qB?#58hhV6ufR* zg~2;a9z7p!c*OW<;Q+G0z${n~#jj=j2Q-^-ioOTbsHy~UwFsN49}+5Iv;q~v(*2&n z4Wbyqk>idIL}Zb_yYNohw$PqA&DXD4E}yrsCYd*${zO)N2=QP-W^>M@dSbs8~)feN!n3$g9&qr$Tf>!#^Yn6EHVE0 z&8BL?&A?y5nqV*%SauMUMvte6GU_uC2b6`LH>uE&5I?U1`W203Z`PP6p$R_6KQ@LP zsu@=>P>D&lOB!;nLfuvp3ey3HIUu2X`(KHbi7a<>pyW@@bu}dHbG_vrySW0fEq~u} z-y#+cPLq~o-dy&mS(xejsBCI4oKYwT9;ZbP!O4@~fSX3Y9$_Pl0_{xgX_@L4UIMZO zFmtsVN?QAfF7mOSKFbv-4HKn2<0#|;@=oIy_91TQ*bf26O?b8hh}V(y*zT=H_X}B<^t{_h_D}`lDurg0Jb00{Jx2qu-bFI#y-ZJ4`HSKKqve05!%(m)kKR zTd~*nhU{PDn>%sxhmXh8BI<%?h~D&$@VF0CbHAfj z+Ma4&m@#l{6zOhb27gRS_pU8I<`S7s+utW5^9y3cME=5k9D}IXSF-8bh3s7>OPL_N zoi1Q=-0d?2^2p`_R&z%vW~O0U7!i+$ij`2AeLhjV*EENfNlzj|ep8T)_+%z)hzh{e z@Zd|(=H!@~7xk>MG=}n2YL7Mez{um=afmTMuH4L6w*mbSw2drsR=L${DyUj*)we&- za296BOeK#I9p)J+&H-W4?dR`FgnI+dKuc-5~(Gxh2Ak;GZm zY6Ixk*yPebCtN6++yPMjn=HRt?ghyhbn|M-!x#4#&Im}Tt@)gFMi-Gd@6{FQ1D#Gi z+T)x|fwDp2>3%;Z7Ogb&L|4p6yt}d3M$*|k_hY^)Wr?<#<<*4MjHYX}0@Dw?4CLC^ zP73)o{AN4ycVKR-DNwUa4Dq&HAMb=W@Gfs;B^n_BzLL!09) zgcr|y+spe(jMs(p`Zx<=lsH==E^6MYO_D&ZQ8QE^j(?t9o~Xrcm+j*#^zY+kq$U2n z$MQg#YJn$88K`w_Mx!0Bah8x)!OxXi*xD)an)5{s{#hO%S){ z0QhJ)>v+rmBkC&HqWZqBjf#K;NGmC=lEaW9tspHipmcY~pdcXKUD7#pr_$YB0|E{( zgyhh?XY}`f-VZQ$?%n5{eRiz1wghaAXn%L3Xx+nI#e9-DK02)LVlgeY*2*UrtG;;0 z>Vw!k-yx~$kad@0GzI&Sd8peR+OP&v5?ObaC$d~xcnlXf7n80lCk?6qP_M=kUU%aJ z%nHEe!#wE;pN6d>cu(q{B3ti4HRpkT$JN@25!J+x6j$FzYLx*T+zbH_F$V{C%H@#S zaFBl~VzZjbO=<&RA^^HJvWsWt*>@lr128H;oSGMtUXTN{NLAPBGtKhpZV7o1NjZVM z#_6PBl_tI6UHuLT>o70%;Z0Zi4{hz%VhXULJ70X7(3a;0*nv9J?9k29K&g4F>2ADi zfTW>k7dWJaAgzGk8A~ZPH1n4Pf`bH~VW8P5=Yb2fI))spp^uLhqz`M=XKqeGculSt ztnC;+=kYgM)WyY`-Owb+;gFjq0;gK~9nZzfy`!}NK+%leb&c``q^npmB!%s>cAy3X z0Le9g&j|YCtwmx9qOV@z;(oYTVz(0N!1lOHp}dsY<*KRKnQyR=&eV_CHXh;xIXjEIXXHKo|&1zgnxOfd%*?L;O5i}WgBs{-Ct=#XS z);Et+V+cvjb3~f+x;9RH{qdFd!ZLty4KiC+4eEEh$_0}vFMbE+bNm%uAx!TgZU+0s zHYn*m;43kXymDLbWxE3Jr^`89d)znBAqnBLiVYoFg$*3}TR#**MP{N%N&zwF z>CRwTV%{eT#0Roh^FcaF7Sf(a;YP^xTu&3|d4*{FDD)ErC}ldpYDL9E%CHe+1sean z714dM=Rn^0)8RUKI=pK3sSNyJcDwmq9+)&>R}5=&3kEyF1xOF_szZ@{Oc zQG>)|V=~M8V1z_=2t9pv1w~~Td_5FP7V;a?)QwR}f_H$pIEIgb%Vq0lB6(#cChD7^ zrmlK=G?Ml>yz;692OsDFlMGi+_{TI=TzP2JtLf1`1eR#$uDSMkjd5pS1Bn`M%h)p= zN`dqw%^EDXdVxN(sS>?QLHr^6O6<7cc1Dut!F|=JTR;wZEfB24D4$sCv9%Gy6Efvh=i?f9j~q*(QJZYAQ$bkC?-C3`OA z$zJO2$0%2h>BPLvV5|pF9hpnVWR<2ukF&=;B9q83BUQ!(6RR~LXb&F7-MNZL?X4GN z{N@Dc$5%$CFFv^`BLSTJ-z~JY+KgDHr+K|f_6C(36Y}+ql35!)S)^rMUZHMudPxK5 zfU9TGY5ZvXgjoqAcvwa5k>jxsV8Ng|-QC-+&kJj}(j1}*i{dLF?)r4(2{PiB=f_3% zQ1-?L$KN(@O?LH;WgB6p^~L1j&z=T**tgP3^eV zkYy)KRCMYV_kmlV);<&KRV-(|!SP#JCr^+x18-7{O%=2Jn)5mRuzCj(7N()=&^78DFViFSoy;z`q<*kum z`_Znp6ypaT5|I2mYKvrBL|H_;>Z?5b1*>BVB2=!~GuR#akUFNIZJs?Qx3^t|_x9PC9=!); z){(qfMxO2ntkT+2cCJ~+ z55h|FYH<%@+Sa;dlk1hL;MQAZQ!NCz^CxGN=iC4$E1gO9Beu0atgI^}7;8|IcJk0A(;vQ-+ zIzXTFvT)4^SYd};baM~Dq$O+r%0~clt-ds=0msa;&5`JI9B@Z~aA91r$Ei`_UIHfB z0$7}}&|5W!M2mZ$8Bgr5Rl9_Jk>df8;QVHWzTV|%NYacds8wy9zqZK~lCz%deOl?e z@CH<7CpLZ*MlgA;+yvD=AcxR#L_wO8v^L?GrFw3WD&R{b^_F3!IT|< z3dJrJ6oh_RDMf@cMJw|x86QAA0MIut=h6Y4yd>#Hc4xml4_S4z13Ze2EkW>82e=kE zVb4RAe#DQM3H`Luz%R{JVXJrfENSBw7hR)mR~IrT%X8~TBsQ4x@+BHq^mz@)jWJjz z1i^$2BF2B&22rWH3GY~aP-MgUdp5wm+kueWa-6hM*vb3Qv|8KW4rtLl%G6ly=VM;s zL3h7P+%w4pl&iJLoH@*GEc>uz`ADTkwPE#@3G>{OD~hCGqQfl$gc;Wb_icb?s|)*@ zL2O4?4bt<_jh+TY$y%_LB}7FNQpsLWG(!sK{Iav90W+s~>q?v(Lk5=T%a;h>Xm~|~ zYZ$Ahdp^%xPGE`==(aB!k*0+tU;Hdy>g==Az4A8ql3B>f$e5GwUvkYej0zDx=~73` z$ZnaxzNK6ukg)K@{#oKvW{q>8&}HNcPVVPZSm*cs?Dul8;E_j&#pR`)0|um7@$yU6 zx|Yd`#pq<26-sJl%USy^U*4Qme@W{>_%w!8A6@bBL-+X`!T~RQ0AK|}`_zbT$_Ulny3>2tSBsK2 zXEE}?WSqj~>Fw!jnL_?j7rw5Nr>Nhz|9;>U6j1CsDsTRH@(&yO;W0$3>GiGdL(&)e zJZKd#2hOow`jVJ_9iu-d5spkqN0yRO_2! zVGvSI+haZ2K^~1scZ%VFC11zb5;SD3o&UD`4}mu(M|w-B$J14v#IR`s5N&=ZyZ%uL z07LO8{{chU0Nkeo3=Zi5uEuRb{|*Tswidy$K#QS#RyIr}x_K_~bjG7NAE;-oDyIPD zie~%9vpfgw-5@%Q=6tl*#3%neF-k$WthOJUW~kCVRL%HSN8^4W5Iz8BAg6#FkH^XA z{KYyOxt2Jovuv_p*wR+!`2aq^O5vDTHn(e+r@G^^uE z<{CD8ENKl3;dY1Cl3#=Z2PAJyVgec`f5*320h9$4JF{y6u=MyW0O_H%0oqx;%HkZ( zE48IZarLm+V1kY>QBR0tzIeZES(j2km*T3y>PrjgS~)1tS)Z?!KNJm~xD-H%xe$Sq zn~sG@{uKQOk&Jm2zXT1O3}h;;mIV@C1Vm4G?FlG&UTI}zeGH3~!o)Ajjz9Rw0q8GS zl2<-$9G2pXwk1*R(=s!;A*)>836e*nCM?Yrt$>PrV&ewL4^jkYK+0h5;T6T?h8M|8 z=i+{x9hMNMAc0%n!|a@C*fE+DWuy08+WLT=m9-__E?f93sc~B9h3zF*0s9XFVvG8- z8Cevorf*N0(nOxW(Mk>vXbDyBUL*|sLAwd#682K&P9o#miaji%X%X5WFk zI!V_)zE+Ut=7Ae=VWEdu+kqLPUIz{oxY=G=E+GQ58eSWKK&YXOq|gT|mU!a%bClH8c+wrRHt-5TWDVQI z&(s#Mnznh1C=p)Ls(Dn>U2fO$QZ&!TkedsO0vxBgLQhLeHn4-}py`kSukQ>m6>X@hTFs{;HFyBe+;UE-UqiQ7XZ!aUq z={S)}h>fYlIkrHRLGin4u3tBvikv>(PK@@nGe)%+n1TK+M!CsMEg%j`eW!yJvQ-)r)2Z>cAg~xn3b2tAZh=YI;7#0H7>d55su=7V(3I z=R>xzJiucDO#-RxDD#F$wA>W&@%n3=-eWXx_BDT^gpd3kjdhQ|+-}8l=kXuY4q#c{_O@w)M&K`cmD6BwT&qAa#xQ822^4 z)aE}M){%^41DGVY#cZcV!c_=m!5cxLPAf*F!A(rof?yPn7#x4?tH{Vnx z*sdE-S3H4J=z#Q~0hsV7o@A-)w3bC6l)~p{2KXqWmX|$9RZ4 z3-I~4`n3&XHgf0Z5MHX8!5e9-QxKDJu8%cQUx5R*`A4Jb^RCQa<*l*d2)4f}EN>wsxpM0vLW`rQl!#4V2CJk=qJ0j_+-*k1XJgEWh}3rr3Lg)!wougwf~wbzPc8lYYuJ>4WPlBs!7VsIyOm)$-a z_A0QSIfm^So==b-PCcIcJ|_IGw}hRJc>bnZxxdGm_v_4IBy9zRpGUAGYjqzJYa&{I zyoK6sO6{(xdd}WXR^p(eUHUSDJH?=CZ*-dJZ8 z1~{7Uq}<#eBD#Z}w*&i+EpB@}+kYvMGPE~P5ofHJmApb}|9)(h@XhJs;MBKz5Ap&L z6c2PSFYy;SkPj&({HeO#D(1(vE@M)!Q&?c+YFr^|qSuqj>?Zd9&dz2$Cq66Y|EB&aYZ6 zZ(PUrTx`$hZ7q0*K1ttecYFpMPj(NF z`&&?TQKtAM;?5uxkUDwSea_1`rz407UKl)q55}2FG~a}6nR1CBO!@ZMT#Y6((?eE` zvua&YTb$fZ{^|LkMZ2$G_UirPn9ArJY<*skWqbCBu6gkdXdy_+6o0Mf0&07xd?j>a z0{R1q5;i`5T@`hX5|Hk#x`87cR*25;uy(De0k<^xOJT^ zk7aO*ttH+$yy2dD50=*;`7ZdZcrJh(l2)^^gI z^%vzb25tDK?OT)8vEcTW{`K9f3lV-BC?aWqd!>z{=aeGu;JiSm5%BQD39kU{4C z8{D~e8`;S4hI7W_RJqiz`3&YUUdV&JfzF-p__QDb&vVNnhC-Pz^o$|B<9?=srvoip6fkDo1MoHhFoo~!p@ zOxvyvH&9WrN3qqZe|&MIEp&;zzE0IZk*+`!jem@6K~jBuALSfvCI=`^+gg0hoX-Z?4sCg!c8FRIl3NxW zlFOOQ{r(_dVMfALq9jcngHUxa?zw+k%Dqq4K1es3*rxs|XVj9Ev9cqzH+d25?z~(| zcuQhP&gMD=SLD>&sgtP;19ScXe8TSxTl^**F7LIN-A){W87^7Fr8fx#U#wn2JDTt; z`Wqy9o+eg$A96e?HaZgews)5Ci+K2VIdRA4#aZxA*Gq$hIR|sPOdB`dnq~XXN_KKg zj1h<}+WwxV3ZIRcLAmS_yiT;jIFET2#7=K&!VR)2MBqPFG2bI9hD>EE^>f}t~b-oKOY zMpewpDIG^H7y7}XlDqlG!kjbJrTT6>Onbjxd3S629{el`RW~i%{-g&#Z%|oZ<->qtTze2Xel1MjyE5-GjJUr&KYF%|8|=unF54~F$|I_NV9%J1LwXmGU;m{S|HIb=)yE)J{hOWC=w2TYS@YLy#+6a53bH#R&cMp{W)W7vV37E zKUqY6ZHj-e%3!| zUfL(Vb@H*&cGc%rQZj z_=KsEH?-lsM=yhBgax<``@SMSy$o6F7iO<6fV`U?Y#oX*)vAb!wqxk=HkKg_|vXdMT7>iV{>Bw-};-q$i^pwkvqRU3z6-$?U|m&~Zo|74x%lFemx9 zQ>bXL@BANV%A2|XgDexR_|*t4Ik}|a%e}Yxr+UWMn&Q@F1o1RY#FNb&zpF-X)xVj3 z-!~H5$ck8#{cSt-P$(w77(cRf*;F$p()MUaPI-Or>PDY4oa3R*(qBJ4m=N+K zQJnSFk@i3#xJOoLygW&V^gD`LQQLYhaQydY2{YWF*Z;kOr)pGr zU5>N(w*hW}dJVBA&1n8$SAMuuFEXZedA=u$hI%-UHpQ}kVJmGo4e1h+U|p;q^F79b zR&NM=`k{WIx3c>5c#`;AL2=Tbt@G)*xqb2T-vd)Z#E0hfjg}w|hlGWF6~!#X^PJX^ zeXY{n@U@+8ldn$qh2QuC^KXKR$zSv9y=?Qfi}}>TKhAYCEtDNI<7a}oia|0PASNF{SD{SS0v+aJ~G5+M}q&vu-UZY>uU6+ zRO=dM13bH#EOhOZdv4#(&8Yb~)6l2??Ltt>?ygmdQY~xa?+Cgbgr%~)8r#RR0CL`# zr!%bYn-fC3O%hxb(6td2WIgc-huSflM7P(^4(uL538tG{7;b*35wqSb!sBugf04wH zJi430KEd;`obon3OUXC1WjUPBj~rR`tV5K&tLT~HKqQx9Frmtj-&MNsjAERW%7%k7 zx|eHVB&^x*GbCfayQ{dlZ7QI=NTu?Tom{_pHfVx9z!g=%3xnphtYm?PATCS?2a@wDssVf9ezz>y}8~ znA(@To5wf)d{9Hs@RzZpLW>!4dMZg=7Am8$u(MZ{f5o%d2A4TewbXP(Dy}wh@W{^@ z-B#5l^PD18!ryJU`awurvanOk5^hbSgefMDrP5pcSqFv`xE=+V=!liXlxqlS^a!#X zdN?kxg67_->NXvTesXL870|8G@MhiJlJPB(Rkx3?xcz)_afJHh+M8c26QiCnL=FrJ z`@`az90(KYu%jO}Hy*iix3>?Do0g-&3k=Pshzf8y$maObXt+d=o*{^(yY)CyuH9y% zt^ACJm(LCe=xZ27(%aVaN=!f5seIG$VL@f~_XA0(F&@>c;N|8$&58Ar+S^4Hw>?Q& z_MFWgf7O2f&=miDe|BN1J+oB7V!S$4x6R_a>7+^ur@A!Woc?rFkr{E10Zy8}Uekct zZ!vo|?T>=n>gX?=>4^~$6&I09~%mW$Wj8qy7sas3&8UFj|DLmVGuRdTz5 z6RkejI409-`C?(S+7ds)gVN#%bzX@ahDAmDC3PHn@$@ixlP64ExWjNv4wBzHI1Mt3 z3>|A)W>oO3lFYnL=Zsy#Y)p+5X*V_TZeJErDM%vtJDP4!NU%sE_3ZT5w_nqri|)o> zYEV^3GO>NEsxUH(9y|>{S-4b>kjO7AG^om@#M&6WODamHmQ5nqmEvkVxEYR@R|2N1 zTpZNX6tD^#mpEzg!(<11g>vbHLyz zMo%PY$X1S!+*d!HIlbKNU$DM@!D;)0N)reFY_;h6J+|r#U!`IC+!65V^X;G*a}s{c zIuhu1)=(!cr(>)U6z3i%sDXczzSQSRrr(=i0|u1T2x&o@T-&c+?LBVuTwrgU=)4ay zP*RzoY6#_rSDL9+Rp~DY_8xbxl86xPwYW|^VzSFMJDx=`_o`nvGl|#V``!%onRFn6(JId>RH6fG1-!4qrKvjZ|3kj2?)$R|2A@udCROR|p)mf@VQkYEn3rkh5d;zb!Jo3O;V^_-0 z)B(o&@W$G(P$7DtMCHmm8p!D|A`mQXQ0(`o0hly;1fM(aYA}<};PbEK1e;0K$t?{> zgM4-*kVZoYd?4ES{Dj1byrCU#feThJ`Ex*5IXW=;L2rZbsvcg-W8o0Y#S*t_?dPYR z*)-GjzxSG}qbIsvC3Rs36&J2FapTtm$Y2D~G(Be#R7E$5w z?2^%zxrt{Vj(3kn|7DxuMfAdX_ELI8nRkOv1@!UnBkf@EgVa}*`snFyAQU9k(L#ti zYDrbMHg6b59nt5O%Q7)O7MM9*)1If;FM9Nfd72<2MkLs`F*~bpXjZIJI&CAeJ|S36 z*ce1tf^r6CxAi&*JiA?vr*l?V-B!PfYA&I*y0Q|?AS9RP)E*^I(2gc`S@iO7YdWbz z=6c0yAv#vf2tk>xOqUFGotIC}i8~B07$&`le-*sV#V7WfE$M)>Wb;&c_2i``%v(NP zXe(;u=Xlf0qmQedOKLS6J`P~5QHRO$wSv-Gf-8Go1=J3Io|YfDPWjR$r3~1$ zCWP%&g_rBB^Q_Vr?fFk&@PhjdHH^T#+;Yd|b=<hP z%f{29pWDghr(PMf{&VA?6SOb&?p@jrP*j7HPCKptY30&IeH~`#Duu;Ng?Zh5+)zmR z`zaZn<;FsK3o)#-X(q1p@E+7=m4+YMy;D`8Q$PX!66Gjn9V}!`0ck{&Yd?<`B$675 z&t&MPr8oU`LUwO{cSW0!ZQ7rlzJT_Ii4H!eqO0Oz0}Q)2k4Gqt+oUvC8-s1mZZ120dD?-ywUCL@bj-A8N^5yW zy*}z&Xp0Dyyga%TZ=67WKUAhXRb~bUQ&o!g;lu&Th>Dbh`KxQ@;FnkS5UCKLbos%d z_xq~~`=EWY9i474_3sethAw#nTo^Ah)C_A- z+eg^_t;Iwb#C;(nUZ_&Zxg&Y+RKT%%TR#bsGDY!0@Fc0e8+3Ood&yct)2Y|S4Y|8ug9kS}==(30_-A=?i` z7nb)+7`IgmBKmgdj6WHpg}V1DnRduh8fI9mnk|FoDF!b?kd%$tyty)(>`;9v(-zSI zW?&4Wl$FLjUit=`&Hs&~!iP{Mt;8fd^|Ml}u$;bHUfQ1PLusvC z@VOUaZ|1s)o_LNY-^Uy`Aqj2g`o=JSX1#ysBYVZ`XE}EV$}eNy`Bxvo8}t_U^^UVN;tx#Gq{g)|0+NxV4*<17r_K|@;;U@Hgn+HwUQFsVgu z>(Wq**OcBD8a8In!i?Xl!$+ohJfNv-Ai}k<@?d9W2UpH&KY{I^z=l&#iii7Z6u*z0 zmYqIUwji61Wmd5YudJe!HWUOQQa=gst;@nYgPqBB(BIoSny@ zfma=##Cd0@O0jh6lQ32Gis)nPZExN|X+0M;9;63ah;R|9D6WGXHW|S!B`q*dy?Egp z4*elzxhsgu-s9O^XSsf+?r+d&S&}1hMP3zw+AetjxxBIq* zitRdmxQ!~l{<+ao6$f&`WK%C3x^%M&B5WPp6nEkeeDbC(f8GRWk2lNob(Y2c5_OF& z@d_a-EEh5urvuftTHiyNVB%<{5*W^=E$iXVZHdDUYPBhwPd6|VqfQ{AnCc9cohnBq z+?23RDi59-`}tmX*(nvfWDy?p6ENdBY35JZy+jQr4@|<*0mBQT1@eFEz;bu;8Ko3K5si?n`IK_%gG0RCC?NF-g=G)Scb&8tHG~QGnVnvMHCI31Et83%$MBQ5CAzVY0f_ri)m5)s_{Zg!mf827 z91RKKy{LY%CzKgJudNU0=+#(I{aloajyN_g02>Pg+V3NXkM%REDz@C{AK}OM)#jw# zF~@F4E`h<$i_IF1N3J0-UrXRh%P2e>H$X}UuEk{!O$Ayl@WxVNF`quv%z@nAXtowQ zjWxwzBMPE5s$f3ruE6mJgFMIhe@MC3wIaSYz3lVTR#3m#Im zp|7zIq_-DTPC&)ckd-rz@`#)V%9~x4bOYLc_iv?RV2Q2cRfj=J)+`o+4X0dl>J=zO}an1 ze7*dP;9WOJIgggFsC{>Qw6OO{V$L||R5`F`nCc{ENi2N$d-q5siN-t1%_myU-Qt?} zTP=*OS+q4 zGpN94a)P;C7R&(=^4o()lX{+(-R?Of&p_!&GHd(dyWy_dln%UlDsbTeTgN+c_Zh*3 zWVtM=KLQEw)l(a}B1Fz9^#~CMx5-Slr+yb%E{W`7I&-r-dTZbO;+-((%j>U;IN;Py z;n*qa(J*{6=spv|+`ltXp({m^YU|b#pj-Lqa`ckt{Bi>7CG_k7ef6?@_BYfao%-i9 z5!wzY1n9kDuqKaoLZWJ!5p`;vJN2`_YIk~c!8c57o}Somf`njSSovMclqtr*@@{w$ z*;ws8bN(QT!-(hHZVI2*MiptYlSsHWHzXAl2V!L*5V!El!8M*}&?4mQWbtA17_Ssp#DT@~H_&z2Gt{fSPFbk1? z9K4iUYQlRyt^IU|F|yasDAt`fVy@uZv?8tw2W$w&q&e4)yn_mdvguf!cadCJ}VT%Xz2=h)0@BRQN z4G3kV*y|5w>$b?UJ0M@GEfm5WcrMK<21FmECD0o5&5_p+K@;n7G7Od{xIRJ{7NS%} z#pw*Up%grpb)Lsz1;j{7s08;xP}^gwI-a?;qs-B|xZ?vn7tW^-`6lFpLnMd%>9%h6 z?hI^e@~lp5r?Y3-VQL}rKR;z(>VAB&j84&+v~;-%LIB*^k?JjJ^dc^c)`g0g{RjsA zu$n$x^`nyDjBE;M^Z)3~{sZ_tqLYPL6~-g101*S=rZxb_%zl#p8+y(E`hBWZIMzhXNzkaVBehwUguo1}j4 za*7Hu^ z3YF83$}6is3>vz2c*3>!(S|R6{z7I9%lb0<;}-T_rf45f?GyWc?sy$YB_2#X+Y8GD z`M?1u)rff)CwD-$ncO0&gx37efk}rG90=tN5Y>5pd*{(-|4dzo+@%vdT37-isDQF@ z(=Xd^XbN_1Y7*J!F;8_Q5 zzkT*E@)Q`5ty!WPU;Mg53mfHH-To-^zfU+x#umbh=if2k5~!FGia>LhQ*TFO+b}=K zH>iMe-zNB;5863XkmiE_BMw3(nR z$OTY6tVSH33r?M--OtaJwDmWI#=Q{ahBeU#qOdC-x+HR+*2Wt4ZJhqHpT7OpQ%Q!! zT0De?F<(KQYaOPBeOK zr>OhDqS*-_Ec!jLLJ!~)J33S|AwmWkSM0untSB=bqIC{lh{w8z0wiBdPIIR#wRXZ_ z$2$ty@qRjUZb*EQ4CZ896Ai`@(R}yb4c`Ej%V3848Q+XQjkiMrDH==+W)&_Za?^nb zkH}d@Ut=|sf`Kpb1$=9ZvqVew$pAo%d;xI2r)(X>Ho$o7hNY84S12Z(XuZIarEMFx z9RQKc!Q>Y4Hb`T}YY*GAIhMBktKpZV z7ON|=zz=(otk)V;de4VS(#jk<@P6WtzP9|o4aQ&zyn-*AkRezfz%3mEOM^Ku0hX!M z+O|0xlVu2;K;e22f=KYMO@m=Qi?#O=Evf8Sn;tC=iWbfc0{JbVSiaDxNzwtr$Ah2~ zuKV?Scb2@;Uw+~z`0tpmu2M5x7RO*0(k^nTLhl1Bs4ws|dMz#l_32c4!&l9kj|5_0 zXn~=ie?3O?b~i5AmVA`SE!YK^#ggH^@f!8j&Vjor)WK`pGVl4=_~?RYEp zMI-2){Civ_zSeBZ_Tw_vD}00U!A*OTGPd=_TyEh_6&jIKfgiUpOsvaJ`by*6*6vZ` zj;4<$iTIdStUDgHvj;fGYqwN-1%uQ5*E?!lqXh0z1$Z!UC*r6aW!TR7sO+ZOfX+zn ztB$%52LX1W`NfEkiWy;L03Hb31si+bAc+li)K0&%jKCg1GS53pbvOT8%cC75dayEo z^DMA3$8eA+uavN%oM>Dv_nbc=LMEnJrzd~<&b$@FK5n?XecXfBd8$xOPh&N$B^CR; z`eRvl8enlShw$${);V9i2ljET`z054SGmM4`9i2F)$IbHS_&Xne=_Z4MV6lyx)dd^ zkb2}k@+rN)(X#RRhFP-{YaDf@obZ3=Zk$q~K2mP>I?L^Au)!*FA(Imb=nd6)MNP!2 zJ)1qu0~<$wb>=kd4Yl>&IYNk+bIk?3A-CWpiT5;J#KRQw2rg>!el<-z$4qRT7tAm6$GaZN&*qwV779QFc1STZ;7ucqVY5e=nPA=eZ@| z+Fa$BfooG>bY4QaNXjuUexj=TaiE*ICcLJ&yLY%>60t+Vo`=1_ zB!(J+ft@22+qSN&hb*gZu}k*Y8#2wL$@$f5yvBGOU+mY1^Bv9<(gzAztwCn+U9SDN zQ)BBScAUq1V$s|QB(8iv>m#^IRR4+1!60nLTv%E7(lgo$&yd5qkAr+!FIl%zz| z_T>>_Nk@xqyqM1JsVse9O7+c0?HDk_=&vh#iH@lQ{^WqRDjxWTzfm6T9LvMNNl~MQ z>672VD&nTFrRerV|IR4M7tJ4BZ_alwU6?dHXZ0ZAw>ySZwT!@}YaXjkc4ZG{ynXv| zQ0AY}Rv!Rgd`R|C3KLts65La{Yz$Q}*m%47yfq@eR6rkHA_0}X8hv#Qp?eYda#^Y}`Ztrldu}9s5 z*GnL}a@u+QPSrrt1nBl}BoBhz5xZooVIbv_t9!XAjuO)maOEA8*&%RolZK~)7O~-` z16Rpv0HO74e^n7MR8QUuv;)!#G-yuu?3+6CGY_@zO%JV8-Nl}jOmLsw`R|#zeOE)O z=ZDiz%+$<&qFoL@d@nL^JfjG19uBX7L5Buur^g!h;^C@V-(F~FBjq)avt2jVlP01Y zt=3)I3zaOYpt+$Rdh;?K|MM&Rqh)Swwp?~)hlzbU1UFn*xX-yJUh*;W6rFBZOfzAW zVUEp%z;`};buw8WCw%1FuzV3YmuoGfYvX_VBV21vEje&}R=~JAQ50wt{rzfhDtvnZ zs>){4ChE$Jv@Zg8SF6;795Im~JVFSPP(`WiV#Z)8MM+f*LG|;cr@|=;$}_V`LN8T? zk~Xeizb!_6r)nl?q6HNz{ekg+j|?PM*7RL7z(z)t*Y>q~&n=R|wM$i9<=S}yT3x-8 zt)T_Q_5DKT%@uR*YNt{#zM{rgpNlhITi;sE0AH3@*zz{F0raB40rj0%!>08duyNJ= zU)}jo&2zH-pf}WUQ$Z!q)uFk#&O3?7pcUgLEea~rnNh1*31?m2+;j-g3W-y^7w?Hm-dnsf6 zr6WQZ`=E@8vR0$Dqk`YFL^w!EKAll96fPr1wt-p1KU7-Kw{`}a{6dcs0rQxB>>&nT3 zpl>yEX%W47(rPNzwjuw0=HI`5544blyw=*OgJ@pGE~xe9Lgbm3Sw(Un14$C?ixW8< zF;p3{(q`M(``o9{a5bTx3&JdyKe6bmMYA27!oK}0vfi@Ow$fgMPM_;kw!uZaT&Vx2E z)*-^(I{pJH$Zy$@qN%0{0m?5hk5`N9BbhI^P{}G|zfQ;I|GSZ2@j>FIYx^Y_CB?2N zEyHd$7)5PgcPOhRuikff9X=4BA_11c4hyo6!ULNaC&OWwpUJG!;z0JyOgGGE)M zMWLj~T6EPqzF^kqSw(PUNk zJ%NhgJI#NuqHvU?Q@CekPjcNmm6=CTyJCwYpCIQ{ufADEb%NX}_PDg$d5>ZXv)QxL zLCY0#?Hb;+^{ z+VvRK?^`RENOGr`PTG2BGPBd6uY64|Dm~@DeMK84Iot*P3DQ)(>y?bcIMwV9qi1dI z_ApBobmW9|3J&3iCn71!k0;=HMxkRnf5>f270Zl}YrKz6tI~}CRLh`d;Z{}2dN(Mm zhba!Yne92?j8|(T3gr1k^kO*vyqyUPkIu|A<=qF>WUL2$uO(@Ab;4uWo`&QRsc_Ys zjAF%IQvw0KCWRPKgj^A*ln`r9^q73Q<6(3lCPmolND}^~oUswmLGq~m=EH50)EoIwh z3S=bVO8t~K_uo2*;7w5V&-UV0$MTQPNBlORIduo6_r*$i54GMVLAz!V=Xm6XZF=g? zru`&)fJx2fN<}|=dUZjEjG9Oc=(b=y)Ani@OxUISEK3Ge69ameYVQIRr9i?}y!|)& z@7&#{dpJ}_>SbFCF}2*TilOJ>OGp-8KDfw2wb&;l>%OCwwaw6BfQB0lexgkE{hb97 z6?w&oyx*(>&NMOTs<~Y7nbJ(5Hv7{jDjW@yg>srZGCBNgA+c_=)2!0)e1jgBolO94 zaX%9v-Pa|VqZ%)V)B#jLrmbN{Q398?Yh*W9Mk*m zO@)204s0;FxZ#M)0AZ~V@%=+LkJ&*L~HM>*z zOwzQ+OEOjWAK*%HjybYWySucb2}8Vv}tb^$qr|QX~G;pZ?0>`f5q9VH>y^!L*OTE2-bj_yadF zcu7`PIT1(z4aw;Ah~=exQM&iL%1se?i!u*>5AKcRvvQrHq?+6X*9_Tp5OGxPus*8C z4%t=funNd9tOFHnS00jjm+om+lgZZfeJRf7A?Ee|vT*Yu8*#qAkv|2$ocj(wckN?1TPPjNY`Z5ErJgLo~Tl-)rU%0E?D|sd2<)VENJ&!9_hZMN?+sX4b zrd@?J=|+`D_m9UPtp08$s&`>cIPcPCG`}&tTc?<3)&SMqa5723xLgN)7rida#p@@{ zgzRgxM9HbEWWjcd6LnPdi<^W zgF>%O+mDbk*_}Z8MMNopSe<$!^>C+dmtPt;4Ya3k%4Lhqx@U-15;{cnccynsUpyhiF2JiV43cc$;k{dX z7R|tH(d)t5W)p+TCq%DFHXT}D9~fJ=bt-nHl>2(Y46)a`qO`fVmo{ot*MJC)|NPVD z_ww2^%osN7U%ksLLWq0`)tEm&PPEJ2UpdYZO7J`?`?#Ur_Eeu&YcHkid+trQ!py>L zsDGC=?zh~Lm+?~Z0$kXVYYdTd|ZRiewD}>+r;G8e~xux>XgkUb6T(6%)g}psRnhYka^YW>;^=OZ?q%yP1iEtdPW~Ge1}uKWLA! z6JNKFdnJE=ZNp_dLv)zOjhlSnpvG<*48`A_w;a9|NokpA8Z1GJI>N&$Q5ZI1UUXZp zxU8=PCowd*v2dh*thJGc+g(J>jpuz?!Qi_FvADn!Vjei9nnS(M?TI}%n!$u1V3g7H z94;gHsguNAeYoWSV{)4G0BFTS9W-^hrfv84-$UwY3KT7N4%+9 z{rW!C$_Fyz31MI-V|ErhZ~dOAa?P?cM#`L?Q=2lfW<0N~n5hQ7{o|c+b)gxAH&`g_v9U|eweVE%{y{K*s%7{vl2>{U%EJNmmhY!`7hj60_@nep~SxMylYRRcFRy8@^QoeT_ z^4NGZBNZ2`^U~2i`rM>2Q;O<-evaeppicV|4Uu4nZdV(cCUceE`Zwc}OY2l`Gcl&h zJDzJLGa*&rg&P@Iz6?t5$gK9Cr*IyBB*JY&HE;N=DIlg=O5roW3D8jNr>kdwS5NSt zD%t{qO+vv(!j!b?ZQTuw^4jTIW62CIKKtoqlHcp2e~_wwB%hFwlhUY0m=iWpPc3cX zAJ(~^SJANboq9G z$3egC6~x~h-gx)WZ)lMs5qY2(R>BF>#)2i_dM@VXL~R`Zc9XK`DL@$H1X}i)wR2fZ zpDycJ15`)?>nSAk-#R)-lhMxYH_a^-2`0A~(;tnu8ti?3P)KZ=n~yIx_)E;9K;piN z-$?#vlNL&n2|tF76HIV|T{A0R#vdzp$bU{^lvf=) z?mB-js&o!b1ZMTmy`-?V4wpoN*u7z(I>~a0(vp-Zb-g$ui=5~V*jc+rTO_Eh@ix+# z13B+igzKw51BITTNE0UJYJ$HIeQD=IN-V4j*SJ)K@$jC#R@PuW5nr^YK&`nv9iP`7 zK;zvk0>&1UHrKiO75v_BwvRco0aDh2WvWE$zTWyT(Zk)+Siht?Iz2R$)xlvZJ9YcG zi(7E!^_;&wQBkiI>xyQ1@|yWA=|kbRX8n#M^^!G)&+7+3s@bPY%unqtnd7W*WN`}| za{Ufwi~J?mVe;FsPHvZ#ZKgK9r>_4*zR1UD1D~4wt{#_}E)pbmKLtPjptSk` ziJy3D#*1VVlo_CYfb9=U!r_<7Ap4}IgH2&akhCD$0+2BB@r0iM(F~^mdEC}nQ?m5z zaZZ9?_1IKCwy5MH@DI*>IbNe=K(p1=Egqp7p3TSDe4lT<(k@{3g4wgId&zg;dOKs* zi9Vj*^7Oi_2$TF3XXq+j9xt|pSsJ%EHT=1&>AEjT(R+yNVrqHR<~a430vv|8v}Uab zCB>)=i*H>0y)Hy5ZxEYWkY&7-u_A^<2yUK@cbt+7dADHQQpzl;`RS(7MR$+bc|V^j zrGjX|E$JWw?>-$isk{0Mxzmn6QdhJH1~-V#7jNM>o!+&;xC4MAERDMNJ$vaZc9mK# z_TB3;8(u}R=r^VGFt1(Zao!NLnrgT|YZjQ$`GvnDx+UZOd4`6?bkX1_8DNf1Mfdhg zbUfUbuW`K`3Ou4+e{*G$_g;)7pa1dn<3fxTC-rKvQ2DL-Z&HGKR#&cy1dWMkgwGv* ze$Df+@#gb6XfB!`;pYnRS-Kvk6@Sl?_MA@n>5Eu7BUDQWBP~sv4I^@K+z)$JBXO}- zzkNMAFslC5+QwC4@?k+~CXSc!>UH!dMG^Ir_a`UNq%N2RyPIjV>-NdR;W;S?JpstKkJ`38xPa==`7QH-2Cq+${9L4MSJn8Q}bx zH9H=PTDP5>@9Pu{sD|pKG;AX-$`~@Q;Y5>sG2xr-+q_VUggXUBMvnUY*9c=a(%8%K zNv6*6P)|l4Tb@W#+M7$#N;FnFNPJ-HSOyqq3J%xx$B75}j#q!RqfrDU4lVV0?tMXS z;vX~1q`ex~KD>d651GS*63PoCqDnSbCfZybqLH%59J+qv+ zHt5S$)99Ar@$ke7?du(ZvSQ1zg(#D^e*60MF()dJq!37KSMw%(39mx?B0*^-KTOAAi1WSZdJfmLIkJlCgF+h#pPyc5eh4+;LdOykgvS%Wfx zwqDEyoixW3UwQOb^$YrJe8w;0v}KZ6%l3zhvAswmJx1a^vsk6cVm{@%O)^MEE%1K6 zh?t1C$Q%ZL^ z9kXs*2}n^gB3%8;G3YR1eLX~CGvDn#ArsXcv=o1QIGAsMBnvc5jE|BM)Y0dB$xYIc zW$iwGJl0;LtzuodthK|=eWRoT<-B5q`(1;Vx!v{$@f|P?8u8Rf8uP>k#wM7#pw7-~E*y8pe$1Okv^ytXyHJW5aSe z{`}A@BS!%XtbPM)rme0x?JMTGN+NEh5yMd!6VtNhd3v};c_N;xZg2kY@V`&u-jR*B z?FJDP!ygIQLvt;UT#Z7opOzSx*^Y%XXlX`!ycu1{em+W)(zYyJNE3us<@Kev=5_6x z&CfO;TkXznM&qWFwl8UR-=cwBE9nOkd8oo>rxwQsedc!h^IIpvvAlc9XTW+lT$jq? z`f_S(rtwSK39%n1T-m<($IDLzzlHqJtyp8TxZ7_qXWX7CN51IsVU2c`(W-qLkU1GF z1=KN;!Vms#I!Hkh^tZ%Y3SJDbzpElJB4x&v(4BZFzgL)NqmOw-;BNTInQZ02HU>Q} z{YgQuWLAAl`HkhO=DXp+wu#RYGcb9Q>WKQIeISFd()%V1HpU%0en2*--0Cu#`9L(e z+;uin)@`hnBCs@FewzOP? zO}w$t*mzFr;q8CFo_cM;V)9KUGi|l1A%fYW(v(+ha(*OBT~k=T8^Or8ojHMV%AvV4 zpngPmvy3)|qOH%6=K|npHS8&%8Tua+F1&h0VI_=Y4yqa^7Qq*UqjHx`ywp(WibJjG z(f@>PCU8zdA|K#!FTIa6Uks(Sq9lNN*jdoaY=^ds+|Kb~X2j;_tqh7y8g(YwS@LJs zsr$BLkHVz?XG!!#V-cag+#ukRI3=a5t>p8hxTDMDnhqgP6$|BhdIO0GKSIBZHuc&! z4MdxCr*<{~I@ZEgP_U{Bvo9C_J`zB~CDxsGHgD5`)bD?G zhl{d`sHV+_fViJtWx4qT5%09yX0ajizeD>N_ZxzeNpX0pd;j_;4r((7e~11 z`DDcZ1UgK*A(JsWLHp_Lr_4NOzk`xwWn=8p(dGr(^mU$$rPJpuNG0dHe>WfTyijpv zl6S40_{4WsK%WgctA3~K?C<6;+~V6{RLu4V1_d)71fhb$Eoa;u z?S_u&_t{^bXC-(n=M5fwg_C5jL6b6!B$yblb4+T5#>#~7tt5%GKtGkmGF*h-eo6F zm`LwJMa!1z{{O6D9zAEqWBHtdWg)N!A-#)T@(C8c(@2_;))ayEI38_?o8hSV1de1Y z)@gOCl9{X)*~?>!WGcrJ)JmCW$@y}g_AjR!l$@9wXo_C1c2+toII1(gq8yUbm9;oN z6p@E;Z6GP7iJ=k1D+}P17ykpX=;5^pU>zV2B3QReY4?^~G=hX^e3~5+7e&+9&4!@= zOQ*m)B8K1L8P0~~R1FAO^W5LzsFIYQV}7+vUax6#thZD_-w$q|tm0JBVu(k3pMH>N zJCe-kPu-X>o2)t(Hruj4GUc3YpjKDMNYKA6<}Rlt7rAj|Iq%Dujj?`WZgH#{3}}J< z_@`w17x$~3|D3cRl9rCX$=UN4+=>rlZo}hpOjtx^rWwklRR2(0G-6m5fw!lljPLM5rM4=&a! z-qNHfOr$rnk5k(Fy1=~QpAkl3W}mBc_ME=J^X0n*D%T+Hl3g0Tcf(1z&Lwtv0D(+i z9OLgQ>R=8T+5J9Akp9VVza*P0tBR|#iW#bVI#SwRQrb{RRP@Q%7t7dhF~UsOiR6w@ zYF~9UGB7hTP>;;-%!`*)R^nD3pHt81sHDlS}g zv-9BU-u=W9;Fii*5qR6DF}BiS@JrEyP0Q~lAB$5u0jdOXVUaR5_0~9ADZzE z4<_oVOb6fEZWW%I{_5BgGOeogLv(X+QrXvI^}8<<8P<5HngTQw zJ!HLpmnokao6`jxGnGboG}1mkYp@g&4u;rFl<)m<-Q&XK>@2OQRz^)`hOCS;S#vsz z%Ish(L;D+yQ;xCaH*W&t3@wMF?Q+w(v6PLwuKN3Y-`3gCgQ3LtjE6+GkLI$x@6)-R zA0)eYIYn*Po}*oqc5P+(G;F=L{&s30aW?tja+24nwCs4+V{Yca+vMX)Meu0W`A?qq zUy0r;V?{H3zU)kk33~B#1zU?QnD|n=0Pmw9^5EcZu68sN4dPDj%1pW)*qhFWs(sQ_ znlWI>U)8FlX@v--avch%?wKVO+CTbH@PN}PX#L0IoSV0Aj=J~lN}HKHPIQY~qdj@C zgG&)@ZS)oY%EJc`7Ah|tVf)>$yUe?^gLcEusCw{ZdI(&J_tpqzYZI;eL&y)e(avWs zO~3o#NWHhT4BX3dFF*I*-AcY(5HKsXWUU_j9DmtA^Q+JJB{_$5e8wPp&R%?CDplKM ziKBu@Wv0SPlnRed(UQiBnC#Cu5=dR8A9v&#&3%_8!G2AgU0U|AO|u(2%k$Bb*((Ub z<*>Hx`;(@l)(TqgxmozW5e@#^gEaob`%0{pZ{0c}Ydnc{n0&9K{))iC;cD9MqG2cd?Bo2G?8hIEYgG{)1z4qA z{p8mN=-bfjS?IE@N26taKuI};jR9VzSZGCDl{?rp0By1EFGy&m9w@44#n zE>MjrJJbSuG#$;0{vO67fTO^k@X+eD}8JEZAL7Tb6AQ?q;07bt9QB$`TXUjLSAKp!L#M)@pI=9vqd_#4+-GPD zVJ%y7*NI|`C70-ORHpI33-^h!k~XDLqoC7hpEE!l5=G2S za@Tr?SA;S`;>67*I)_Ogy{yQpvXpn4h~js(*hw$wfDHJ38T z$aCLkwU{M@WKt9P;XTJk2<%%k1+qs@O!0Sjkgi;ZRNmwh)&4?f6+j~cbsxq{?CdTS zCo=_*RzBxyoX?7jxs=hfy5+J5AT&;0j7eYfc;_9LaRW1ZLfu(z)o1?Z$MJDpUOfA6 z-0vt4$EdI0a$FK&o+4bnk+o3rka{&?%XBdG9XhM*_Uy-9vz?h>O`LGI^VBkpUpzj6>4>tT%h9+!OXM%{&u9vM_$ zx3@!8ORM80s~MNbpW#vKqOC$Q2V7?mQ-kHBm-~CkEYAF1N)nBYq4xwlwza3^+hJ4C z>y|V`s53kyu53^iGdftreYK7t*h{|gU|zL0uBWbjpAf-tyf0SuU0Lj(E#OhAWZv+o z_0>0BS--u*lKc0T|NQi~@v|i7GF|Dv%p^|md?EPj@N%b671G+MAN4U6EMdQ#6I>Er=09k9*~XmFpIJIQJ&Zp2ST}G* zX~OYp`s%9BS$59uJ4&*CCt|)O^s)5sW4Xg!$0e?2Hx-l@b^|&81(?zShJ?12rO)&Yq8d*CF0ox>@KfuX=m+{X#9p zobw21t!lhqA+YUtAakzu_P zW306(*Tth>8{dfWB0~{6c1OJoawYgzT;@O&vD8b$7wrvH~loC}G%|nyw5}q_+JmjL`T5|D!#{23Qn)lJGkaYSQBViAj zu;_beHs`szLhm0jcuY7+5ABk8@6jjENxi#Vc&^g4M&{8t!-#|JY#D237{;4LV)hdB zX-X&sRi;t#!4UM5H~UD`3d(W$Ls!ImoWq{o5ic><@@Z-_}K&c)KP^?(ka8R!0ENq+8}oIa|!UDJacU z6xUnIg|RG=7z5_Aw>z+;+EXgFXRsVYN0Zsck=BpPx>cl`un0u0c_q8-&(3*QmUWYE zvrX;xCL3d&lnWbcB@=XwbA+NkF6nQ!%TUG5={H3o&i0} zH61OD9-^zlq4s0rxXp&&uO!CS8aEqG#FeLWxs@Jgia5fufTURJ5|7Sv)gKBPuF8p_ z1?#l~$P`-9@X3pkrfsg0AG$TWI2L!8Q)_%_O8raj(jc#ZyQ`3{HpE?GnHp&s)epjT$$*O%eK`}?$@LhHgr@N9TDaz453zU4p*A5C%kZbU>tr> z&q!w?aWnVBKNo8t0AJI3o{4E%Sgk(KsqC=mVxW20X`VkfW7%G zU7QDW1)4itnnt)eyanVdMjt&wsQr#~h|$9ZT?iqUa8lYggR4YwDjRyJXOc)1CNkfA zKCC3nj)@h()YD=<9{mxuc_V+DC|J7nU~)d^Hy}(k25=tPjJGln)O=nI`<&U zoqd>dlZ|~NK(#gVi>*81VD`V!rW9Hvq5KytO~*|1-*rbkIw|Hx_^gXt%`O*AZf`f>t@Edhk6^dBmOnJpRppi z%XSBQiyIM)Afd3ru0Q?xeNmUi`0pz>bJO#UFGexSvO1DDD|FJd-w*RNgi;-W!8_1` zLrb(h*6-374PyP*cBqrptBlHJWT zg`bxA?ir+;%dy+}F{Jn3m`^|VIO%zpR@N0tVdgbI%$`eTX0US%@6GlMqFt!liO|7wK1zq(xLuOs@ga@COkR{vn^@a` zD?D*Ku9hc;lqq{~@toWAeb<+>?}GRXZFRS{p=22 zyrF>lT~6K%AOYjmsuEQRDHa+-6h&t`wE7mSL=!*vNIbXtiwBQf6_IJ1$8BjqL$W(l zq_-Lb;ATgcz~PO4i7x#M$CDr0V1ZySm(teW%0-t&3Vcu2$yeLOmgf7bWg4ndg+5aT>qW5L*Jn&?y`Ln zIJFM51}Z4XwH62}!tQtrY~JF-LD{G1OB}CdS+uEtWyqjemEstB4}_-vAg?RN+9Z63 zx!`Iy`bNW(p5rwBMG-$0mlfjnm{r(Ok8>wc>}8PGu8Dj&e0{ooJNJK8&^Mj*Yps1d zX_!-Es=gFkCZdVs#p`>uEkSpNeLNVaz5dWxc`K%syQsW`Y!7TDss6jTb=EZcPcTh& zT$K?%AJH%>wMjPHRo*JA0lsKHjm`gvY2-M{J@evqRGq;hp6XX`QK*LepI3e*6Y>`O z@;TqiYPzlW)&h>a?jxL82><}Ox90~gvHHbjCScZ@oGq5k0$E?fWyJ@n0GM~)+VdnP zHV{Koq;{B-10mOOz=JE~z2;M4uzdJA!7VN#CtogBF4b|cY=q-a8m(Jxp-?65177}3 zDL}utKTCbsmUs2AYz}o8Y=QE4q;9#Hh*V=KTRT-?RU)aJ2 z?d{OJsBv!9#?zPgZG%J6xaf)$RGPjB2cqI{0S}FPj!T4}^6DtwHd0)h?H=Jcu%ijs zQLR3_#Z+{cnX%cMYxho#AJ+qKA3((!39Oz9P|}c>YTWNQ9~ssH;MH@Ii5yutL284Z zadL5%BCaoEJfpZZmS^T!Oc7+2%DiFk0lHAN1;M7?N%wfjP);gdrrM-==*BAjbhF(9 zP=Z5c8yHdZU26AUnL&n9~aY~4IJ=;&;H-)s?Z zoEvBSQ={VO%;m==p-zw&Urq3WZC4 z@;gh5gIvXjYIC;xA}V)JE3ceXt@5_;T%0<#Zwmme6ot1iwb*qmlunfRfXDp+>|ISmPk;C6OSR|i0pLthIS ztF(jWTl-~P4?3X$WsFBI3G49@65uOucbKtIwf+H4+!5I%%#^#4<+9Ux6iie5{{Lsr zbaviPvn>$+2Y43$Muc7!q3rkX7w&ibaWfH#d zFLoAt*YX+jEp5>>6Jw5kFjk6YXCCO`odS>V@8_L-sVIa3j>1lWt`ugxi;{qSw`Q$a zpc7_TQG?WK7muF}pmhXLNuSmb9``SMKXg52Q6-5Yt4vsx2|EV{;=FxY%h_5KhBd%V zziPP*nJc76Y7W27LY*@?2KAPD*ULaC$Ob1jen)Re*#(SRwW_2B7IFAE{wl;V;JHy! zo7xOTU1k@-|!OU}w1znJu@_823MJR-E)@wSF~ z^@t@UO8s^X)LW9j-r{L!L#_*QZ~*fal+Eo?J$RH1#D{PYNu^Y3PPs(GYf7v3nW-rU2g34=Iz|r^cU^|c0z?u4X}W(!lKT;<=XLc zgIvIPM{v`zACRX2dZrrE?8ewyY~FgSp z?nGmHrULDo2B}wsU0aOcCfv;QU%M9;rat1}wVx>L%9QiC$TTutGn!rI1Bk_~Lx3TI ze23YpUD#P0Y*4_f5Ke7zd$1qhR@LnhcrHjg?XB2;LEWzkfXAx#rwkir*MVYab1!Pt zUjCX!cqL8g|8@#HQG8rlSSk1E^lLS@>1H`wj;)ebxt~JrFZy0wDX09Tv7gA z!$i9049iY(`Suxakp;vCKdgm1nVMIWU%blbLuD@Mm+oXV&_r1doK}VJFC%^Qza`zz z{IK}CpQvtGQ5d>bF*>4T@tY|8;gkCdpx*ZrBN+#@R|{5zK%(3_E_hmA154t))~vAca|7N#QQR zFt38g0czDqGaCaCIS<-U-#CF}0JW9C!N)cJGHWUVU?%Ui4tW57`+&tQWNwY}*-AW7 z(Q~VtvBL><;u_@Ck@V#s*#C|NP|{MsMO=hod~K$vxc=s6duPF7(#^8PO*sS2?0^&d zvv-a4HhaClW-yP2gl-@GxFmh@dO#QLp4RI~Y3lXGB*6EfEW-+;GaCbL_EM@NHJJs* zo1d-;X;0evrqWJ%E_983gZN(Xv)2u9=5MEwE8g}!+ZXb^uOw94VZ8}G1X12F1Dih# z_;Rx_jF6bCe~00r-Xd_pQiZREHo3{Z6c(F zbj#{>8@<{NH}xA=|MxWDk}SZ}QkFfCPjfkXY8#|GKe1$Ax7<>k;_qE@F6$lcd;{Ct z_D! z&$#xb%m{hyiMguU_dU0}&}-+Tyn%g2j{1J)^I1_-HSI=v*v}$L5V}F>htu`N;6NS= zj1Vaa3Cm6~IRd#WyABV3gZ}9ESkNnygZXSZ5Y8ulOJs2yN}x%vn{z?wu&wf zye>1RF$xwx(#B0(syET;`%cTN67YIv-8`nuFpR? zmoM95{oIr5LjP+jbpJ<4mmpwL3M6hfN?^qvK1%_*(t8NnzAqj{%MMLoBn!`C$jaKoWI|)KV=5%Xzq=7(s{vUm${R>4S0(G zSOeq}z#2}8Uf+?XWIY5W(m)Ah(*b&F#iFHI8~KRfljEG~rLIS+t3~B;er&ArhAW9C zb;nKKMYVjZ(I$m@n7_*vjc9`5M*5#gMwekkZeLPbGNk(v-xGlF16&u<#Qbe}a zrtxpoREu;LyhM^I7Z|4~9Ly9T@j2t9h4|6^f4%0T&9}4u2g5>oTyI*nw_e4>p0E)7 zCv5CzMsDl$=PejB+1?M`Py#U;!Bt&4;QVv?{@eo(T2D;cr!N5 z)I$G{YkmYr)|jhlwJQSDN0tNoufZorv#;vlLvHzhLnSj*wM+CHy?V%j^hC?1>SYLX z-JkdW?@!otRcsUe@K-dDCb^5F`3-!*U+I;HE=i?J+=XHMy#QTFv6rSN8Z2cX7cA=bP#4o-N}g9SSx`117`DW6X$I?`+tK9Z z=N5md$tkJu_t+~au%VGp@CBh!M!jIVvI8ES_7Yp}_`8;Ht7*4p@eZd4RtQQ8CiqQ7 z1GotBm$fnl=z&2J5@sjmqjx+T2=*UnDjR4kMb8Cp9I8XP{?tbBklr~@`oDi%rtyEK zs4{YtTbKIF(bL)%0kE0;I`ct!7-n@ZWl`6*4}k$XP!;>c7eO7yeO;p$67ax#Gzh-g zA8jR&c{J>PXaOYN12y`TFC;>6ooA@b9+hA617Q7SZ_za`kx6q;eoK>U-R{QLa3@Gv|{F2PG1Nn zfq#-$ZcEaP{ixMlJ7XJ9%l7@s^RvBo%0uVA^Ua+SyIPPw53UWM|M$Jn@4X(IIo(5a zSm2cG&8p$T{zdr4&>7JIT`sFvMe>4@uAX`8EsILc=?iDJZK7}%&x#xd| zsNsx5?%RG!mH;EJk( zvGb5NGCGF^mYRihc>7V-Q4+si9^65xnH}h&FHCk;pAY{N@Lj{nn@L4uqN@2+uyS`} z4HOapGqNsNgA4?aaoC=cg4y{J=-6Qm89}M)rJ6<`*;Qe`{2kA4`v<*9ceb9S!MI`O z1L8{7eN)k02lMM!N3iY;(*O7M1P3<+=6Ih+m(MjDIGLazc2eeYRHq1zh2u4~+BS0{ zfivGhTx}>+EnuS?f6i{P7C(X5Lk0Vrf)iNwDambRQEj#L%-nTTA4a=@gR<9K;<=*b zx0UV1n=?Xg%3u*cO1AFiHqjT|c0*#R^mG~bzeeMOllQe}X80i(5Ixyd2)!ajz4 z;CSQLjs6@kie<@5;F|e;Q0>WMB~77Oa=J|T??08xd;Ur=$x0wKG7y-;64kDbo>I#o zTw!UkflHD&lenfcO5&2;CnNlyNYqb<56OGwLYK)1=tDL2pO;wB>RbAhK0jM>qoSM( zD7>4!HFuDz8dXV@p_1c>Jy7HL=OfE&ZiH@}6q=MtdaHD7qNEJrS7q5IHafZ{sUFtl zL2-vTzBXn^pf;t|$epb+ZzF(zXsCpK+;a~ZSR>8!LL8}DWnX2tThqsNz2ET`VTRUa z^cYBhP~({k5PB~u5FR@pCTv(z^^G)tVNr4LY&r?6KP-Mn3{<9noa4Yaqtj$Wvt!a| zBq+1MLr_b-70y1@{NwuYU4N!s?(NU(ALOY(PNG)M(L$tBG&(t;*-aoS6WV?JU8X7N zd7vk@6iU#+;U*Y|zbD&$J~mzYW*DjbkUQ)qcxpX5)pRniyA&1N!&Dew%%I#LBC)?Q zdGx_AV_mQxL{d;VC^yW+o^|FOwA8=uvY7F)w3Y#7DvE=TC``{NIwCe6XbLS@LG^I06>k2PR ztKu10-IKhfxs0*V3XpSc{IMA)+Dcnqy5ChVpPla6&Pjgq@fD5vG|E~+;pC7k)Z;r0 z-V`L&C+`p`%w31h&bAW=Na@1(oT7155zuc#Nnw@#%XC_PrtfQMciWZ6reTQj`!Rq2 zOYX3VfDLYoS&P-U37e7G-vRW;LtnEv&$A+;#jL~!Q9(Tqg+sgp$-=`<1YNM5V^&F@ z=6hd>QTDK1ixcaV5A@vuJ8~|q4VrxqyonC7LB#iSS8@$9o{Sq_`};?2Y4=X?g-I(( zOUv&aF8!2_n6fN8U15Xvh0ly`d`Ks`{akAW@3iGpjHd|>kg2NOP8A?~k??)ziO@*RJlmB z11DbS!b|=X$al%7m^s^}XCM^mL3M<6>%83Ys=RnLaMSnKoSe-EN9XgW&7HiLLHr8E zREGSlQZ!DJrqFl1lS=+mJd^8bbxtDcaH4GYXtpqIg!*|`xlLZKK6$*%4t z9cu1CJ4}v5J3G)Z6(yG66p;+=tBxD!Xsv<_OAZcALDvLQnZl3AQarD%hAIP#&SG>R1A<@X1Suq#&(0&4}u#G z???Z5r_#kxc(h;|0eAiacb1M#=-d+1Q)dct35V|{5+NHkrQhTT)e~R&9a{P1(O^rS z5$COr@!3H#Y=;i!?B5rW%ofKN4*7qOvK1&Pt^&M&Ntph1O*7JNTo!Z3qYp)ahlv!u ztx2GI*b3DHi%Pn}AFqw-pG?|`G{;Ot1C>r-xFh)5gUu_Ay7Rz<*U_%mAn3fZJeXG! z7x?qHctLAl$WOlS3(ey^sZ<~B&+Ep-k{%eC=n}FXyQ~S@=x=R@>l(T$lTJlikCXiQ z=sv$0ak5mgwT8?eF*xl0lT;(aPhc13xe5HIJZEFCGqGmE7PE8_-(~ z-3|IXB(iG&n-6T`@&M7Ko2bbpqM`gQ&a}fs*wF%0U~1KEb-p4BtNogzJHW{bsW^a@X@{#?*l z2)C`0@3B{u?(8`;P3nx-8SCK_MA6n5bsfFUE9`69korKe8dyMXyDmI@Kv#Rk!_$fE1=2qOnQeZcYWRxCk> z8DBm(-xhvjKE#(_j^|ZIXWBLDTx&R>I7eQ!$4{{^()C5-;N$=2m5H>Wk_X8_+-0e^ zl9~y01CGb4ty4lN$f-0~KCDzwUZY$T5qn0lE$oX3RdVDSgLGdS>gK! zbP<8FS(g<}k!_bgC~YBZXKi08d1ow34(bzFkS~3#cB69sc=&4h&R&rX+ zMLXB1fmaD~V@tr_5^f!2F@Ro^DAaH)n=p>NrpH{T+J-*=b zWKsp$nLr*r#htIaiJc3Nq2oOYWHy&9(Zhr=I9y~Uh--(nq>Mw z>}lcNnEoePBv&7$;0K5*f$)hnN@BRK7LONxZLZ@{a~i$&nD{p$D77F=a2DnUzh0%L z6QjcE^JY)TwDED#P{E*FVCO5aZKNk>z&1zT1FiVoF3h3jMBo@Y%K?^Nqo8lC6eXU6 zKqBXzn^|Ae&Ce|Hk%hKQpRrv>MPqigKSWUAeZ57^NA)HtTz7nuK$M?pLf!S$bC1rW zkLD{X>ImHT-f$r)a78UdEW9I=FS5Auwk>kCMiY>{)tvmCOShkQC+mKkYJAM%Jdurn zHUb&1DXiWwUzgxdssA|tOOR;RER>FgoPj(vqy-CG37NH>>`x+@DLK1P-i(f;2N6tY ztutXGjv6lHpnH3}OuX-v1gmvx)7S#%`X7`U+6{#U|9Z^ zQPiFi24-6?JF)?q?*0H76jw!o(VpbLCl3sdnI@53k5oASag; z4)DMtSGg)@Y%P0A!&~gte3tEbQ6^qZ72uNnXLc5)d z+(%Y)-P?RvVyns>jqAvUlP~7O4WUZ(t^WLQr3A{;LANgqPu_N}x;d4hd>S6+Y;X;= z9gCGOE$H&FzV8|5UHXgeMtlyi!cocw1l#&9w)tyiZKfsLXNdr)e+OW)X6{(_g5T@6 z*MtM($RvZU1KX6HzM*__N&W;x3M9dtWiS@_R>P=5Z;BBQK%^nMVKBl$VE8$CV)unJ;nVjU~=}dZD<6IFt2=vlkyGM zU()RrK)%Gwh9$n|u-#ku`OC5DdDd5j9W^wx`SvB=LF#rB{vD~{wa2_#p#8DZG6PyI z0E_z#F+Vbs0{HSb?+QzK=Jr=f47-{+OVV1kc7#DArk`04R`(hcuPz9f06Vh8u_*gH zju}#%^1tupDT-i18?wVSk0bbb95Su3h;|E3`~JD0m8($N

Z@dt3FV+Ctm@{GDmJ!Bs(>tI{rNpCg-BYGFl|US+?9y1f3P)Sm%BiSA{A;`>FFQR1Ak z`{VBClK=TAKo?#($^>OCls#Ip^U&KGtn+w>1);+Z+HmC;jHlHbH{s209n(faD>tBb z?Q4W+Q707h2X7)%Y(&?ADi2QRK^rcBB4y}_s<>Z4+VCmvCpX@9cW4@?z6NxQHN-X2 zoiC#ELXHCMK-ZPmlfv^PB;)~A6M?#(T0|7~=z3s4kM(jcT&1f)@<1*D}# zrMsKKL_|P9KtMtXX_W3!Q4x@ip%Lk3s38XC{p>-XZ@kyJ&iUtD@A+NVdH(Vl_Fil6 zwbowi6Zhx7Evyy`lYjHHdFRVn!`CS+_C~16B`BKudigCiVIG$b^&tlpbEZ=wg0H3- zGcMF{%a=1=nW;M(uIrstmUKa4R^w8?skY4U*MzWAJx$fcjzs(JV!wYr)p)PnwnmW4 z^dFq$YM`k!<%Av7HyDce>Ri#Q?e&_;RZHGq`jd+;C(y|B4moPDTG2N1L7F?o=hNNK ztnu4Og~jwzQE@+?Gen&kv%@(C&CdmTMH3*>AQQ;pMmeE#~J8x?D1-P zae;MgTaVh`Hq$_5I%oZpV+9Psdj^e_c6)vY0-K)w@I95xy?y6s2vwJ&AyowfxnVE# z(3pmOipQHoZIU;&!nLoum}JIk=(zKwA@)|m+bMb@8qt)YPS(YJ=h+W(T)j9qR~Dk6 z;>&AOQe&G+8rNJ6T?1;Zvn{?-@2bBut3j2gQrD}I8d%$C@vTrtjlmwBVw3hcLcd9Nv@K33tCZfZ@@h;`H6*K@%_Ik{dj>DNlm&@k1vPZ( zioQO#%=m#(j*qOuPmAkN@nS4g7c$2p*)mJ2JbQzc_cb5Z@3fsMl@#y#vF&m6?{pA{ z=6&%`B0MG=sKd7jjpdJS?x)Bje7f@~UWIsux-ymSikL+C=e7Ppa>DpEOoG6KkG}+k zJG%FoQQ%^3i(YzP;1XuthUn)1nqu92>(`(NtM*VRS0lx2k&JLE%~ zPKCNDezj{Qbbs(FAP4@Q@gfa9D7#|=bbxdZ&^l;OH2PKP%)10Rqon94bMB>;H8ROZ zK8vlTxFVO#r%c^y^afA$~^HJ#_McVUM$_BnGXSq*d!cFZ&CG}5z}U2*lZJfw2jfh zuN&!pm;G@i%JdXbis-g&T{L^`o^Jk?wCfwZgEKQwL-^CED4zYO=?kSy`uUnl>bJtY z*tm36i@YmL_LriI-U1P)EgeW8kGO-2kj#L9iQM)rH|SD!m)J|`sUR=)oj1VaM4bHo zI*Ig%7lVX_@RWe@_?Bb*VOj3{2>U9 zHYE*JLnLhN53{F4rD8{Dl-02x+&zZ8N!S8OI?Erp`K`I?nSHD0kHuMZH*K#Nv;=OcITRHLeCfv#m-$XroIMj@n<(lL4CEHCy*RJC=on=` z7qOo#ZR!?*B}+3fB5+GOs5ew-dx10OA=8JHD1y3WNvvE@?*>#*QByN$^Pv?l_MdJR zEZ@@cEp!bW+j#29F>?D5gTeXp$BcyDc0!r;Io`bm41~tRtGWPFLN!0#aPqb~U*oIb zNa#uklfbc$a1i9hdgb(mdzOiO6U0r2dCDFef%|iqBaD|9jEFY6_VVLS8moI8z2~1Z zuR)MmTciG@K~z(;O?sqIe#qnTcKKxe7I~p(sjh;diwAtai#>Z@2gkgH|2GMPyI#9J z$GTEZxQNsAVD~gxud7i8HBj}&#po!1_KvsqK9&yTGqAIGAdmK)C1`BT&LA?3o_KNW zgh$hm0?+7LvXKHRfzlvN)tYLfFctS%g#m{jXnVl`{9cJsU_t1(%-R)KPfvPsrzAmF3`$QW#{F8?AM!s9*)v=#z?a z0dfJ%4d)ynh8?OAUh-C6WAH+T)k{qqA1CtkvN15pGJ3}9dRTQju0XwcgtlOWCVv=5 zwzaoG_ieQn$tg__DTnB+=L*w@6{imy=0j?9%KKT2JHGcRp*Z~B*>@h64A z*qt0diWP0ht*y&!tc&Of@0+R->IiYnV64ol^VW&Ly_-6lJ|01t$Jr(?mRUi}`aRJ0 z{EC_mwOf9je2O@`iU(IFirM-tGT{R#LBjLHiB6PFU)h=1| zXOfbc0Ow;!uvu<4eRN>^UCQy}mBEl~MMYF~{|(Fj;M;T8G*rQSw-7;SEkt&2$TT!P z5h&!ETsSY>kUX*y{-)>4-EOpeiXnbVTqov}UDj{fS=t-DLlzXVCZ~WF0W3Omff|fu z`}dO~h9@5p zFtk({!eH#2O+=GYdNO27b32__5>A3)_(Ynj0w`n`1D^C}XTSO4*yEg}zq?cnc5spP@}fFU>B#vbzJ@I`g9-N7J6EABJS-xIl=r#V(26B z*9(KL`@0&{EIbS+FoY$qEs2YCqK$6Nr|@o1ZRUwXePmn2D>Gt`9?H2DLB*_S4Wx$c z?LO)IlnZQk6~2j~pc2&_Y`z&AT;bv5rj49#OnjS{ZTz7K1Uatdn8Ofnp6L`qoFb;7 ztL(p6H&JFy)$-)tOiyQ*B0hzxTRSfI2G#xq`t|RYX>XPNfm9-4maUg%$5#RC3H|Y! zXKAL>r4BU<`ygk%r|*$uHzFgbI++ckLK>Zkgh5=O(Wg7IAP>P@+nn=n#vBqwwrGt9e+Maq{ZA`%?= zrvmZG#}J*XJnb_;w*$eI4k1Gws@mNBM+mb_;z#8(y6o;V3qC3iqqpNT*}M4!8A*u- z?U%zCqbU0+8Cx4Xl_)=YdT*;UK!E$C(vey}N>E|F@&#hdRO?^SiRjfmAJR!NPtRux zmL>}J6tOw!9}Ar4dIXAd`sq0{LMwg4$Qv=%tUGm?AAjw$A;X-xDa;PkJ@7!iP8Av( zpNUSV?%z3Dxh4_&JMwwLHkZPnzW4HU?zTnZ=U>EyAoja-?6OTx-dct((}H(^Cc(^h ztdoD)-LT2${jm8ouzs0u`B`!T1Cq>Y9vL;r54VVsZJju|^je(e`_HPMD@Um{`ijgu@w7a{b)ym25KHROAOx11Xn0LB+4aDU8_C1X1zS;fWQ|%!BqD~N0*u%ay+k_Lx zPY*C`)qlEXiuM{;Ilq6a$GmwS&G*CkC2q{aI7RHLb|1S>6Rwo22BaoL>7ucLdsLuq zM@G*M{Nu%9CzsCkw2DZlU@E0icTi?FKTLS|WMV~6UWa`4AR;9lcVi*OK{P)#{YH=4 zWGH1#VXt3%l;YjIaK;uJ%UA`|c<-+u@ZQ3k`c%lhv*WN(@hQ?5)RQnAz;?csBE~^rUbRYd4~y-62i~H*ZDIe@ub8R*cmMd!yL!;V@dAlJRy~(( zFs>+Y%|F}vA;pp4{lB4NXG5RF^1vZ%1d#fq8bdX$>X_Yo0DeRAaDU*g*Z$;5CBF(0 zZ{%hxs8L<+3NcbJX&~E}?cKYbEODyGQwNAyJp(DC07G2E{m3#k>sw~p!aqKtp=3hC zEAH*PGanKl3bIanNk<^{nTDamUWkF%A-Gwy zOyA(Lss8A6y;RVhfrqmgWDc1%mdY>Sn&(tc*oQVPjJ)K2zaenYxr4j_ zL4hMaIuwg>cp8}8pH}X<)ezWfRU}g>wEr~1J$L|>?i^^hXZ93;j2(Pr*sNygU7v8S zi#Ele_8{r;ox8v2653dLUI6E;BPsj@B~`ukjn{NlmTWA)3(2`p!x&1@bvqfSg)?Do zAf$Hu^VgOr`mBJ;{r_823ZOz9tEzsyHnVi3?YmdP^Lt9Dh3~YKu4KzV9A6Tjbk9U! z>6}<=0^wZu=SR)|8bNyye=)a^cdOrf42j2S*4M82jNDO*TSYX^hxfam&n@}#)zEeJ zF@)YTu#a2sz>El3dYpUVxXP26b#Ffbq^WXn`gTzKu^S;L*oNPp zM}m#Ympx@JV$sg2pusE~(t@|%1yax;I)pOcEd2=tf%>xwGM{<3ipA*cOik440Y?aIli)h?J5FO@8P+nEoA$UEv&XZOoni&G% z*V;SzgA-=4t;usIp*XvkC~lBt4w{N2<2&HIvL5^-zl6rhxye3`a<`Oc=jy!(~7hEF=Dsda5J~a%BbSrD7_Dd znx)nU4;Eo8=z844sxN)S%QgHk+kX(MMM>+66S#B8kP^J2DJYt9zL!YSB7O!?eY=wZ z-445)9~s@Enib8Pmf2Wj(}VqP;zee1qmhc4|9*}?3!KyhKYsw2eoAz%BkgVav=dG& zMjiuqpQ}L43q<7Sjs>zPx_yjz`&5_#$_mGXZzn))q8>!pCsbcjPR(>luKZbPPY^y# z$Sy$5O0ta=EXAmU8@B+wC0Jl{TJFwxDB(;L1EXZ|w^YpbUFjKY{g)RE_wSAR3$L$~ zUa%WIX>$3&leKP%_!@1ssegOH&j9bg(x)2fC%@8iftQZldRm%t2(+_Z@1Vp!y3cP3 zicPITir8P6VM(Y$Fm=*-bOQ=O52MJHEu+@76$b~4$lqIJLG%51l;tryvwGS z82_p}>nIPCxkn)djNL~}mJV7OBiUDs%$d=$ZGGU~ z_Er;wrGaXG{p@j8v8wObULF>6o9L!;R!ZC)S#kGg)N(V(5S8ReXPU65?AEPX#C4Uv zb-s0){js|el6plcl^r8~?b0fGEW!>Vqy^}Gu|S#&sksC(Um@CW6uT4sf!FDU-gi8Y zZl1V|@av%R?jtq?d&T%&8Qg ziWEM)-S;LnrNRPWe^+=sY{XA-&O`Yr013i+B_ElTUc3v)figQN7=j4$hhZ`$BvXcJ z@FEa8#`AAqy;wA82bmy6N?KkR0+6pt^#cA*zM0kYkXsbf#-7c{RI0u1GEv$3bROZ>h<96leYgzc(cZQ-xxoV)F=ls_U#ZnoHOL$Mq;>j@8qJDdfZqz z!e{Btk&Fn+JgPu;9T5p=B8O=1?Vp%Nlf(S4pf(XG5=Uv4f*e;VzAq=8d7M^2m6KmP z?F|~9X%W9$!I>VCvE$iq=b=W=D6;unf%gy_r8w|iuHSN3gZfKILH)i`p+#C1+3MKl zg0}fcZv_Q4q%;l0?4`-6BS07eX#Gz0e0XprdCcZuBX7=qk^PBp_A=s2cw`aDAs2E6 zWkT4>BLAfSqPTorm+v5OY44E`<`YKwss!&fH1C=ZHn|N|k_VOTr;w){IsbV=;;j>jVz?N~#F<(1O!hM-r2nLQEaSUUmY7 zL~b2jmy7{BduF$r@SHk!ceWQ7cWaQp3dJel2KN5VlAy>&5#JJkRu>hM| zcD2H|^t&UqbHJ4ue)Qgb{OY;ux=TfSYEs2zAMXE5U6I37{Axa^pon1_cQZ9PZ~(3U z%Xda#{b%jUkn1RqUd7Mvl`C#ahw5E%m*&+SqMM#~W5{~Qk%hYPqHbyf;3bgZ4Q;Y$ zD|<16CwCFP@vamY?BUanZ8#th>+u;Z8Ngo1l<$M=swdPONyTlwVDLVX8V$kxm8*

eS_@DH&D(}?pWw-6lQ9Xa$7tLO>`_2cSkESLyHUA-{Y~gahfn`Of-DR)^ z7Ni~0AR%fs&gIgVc~C8xD!T_NHmmTP#?ZjUb+FbtSiWocVRsx1g3}IXgFk613JB3w zgDpL^d_0=E9X2@EwCWK4sE+6!Iiw}#+_?tt_@&X!1tpEBNZp-;pP9;)6~+@#)j#*Hctln`OF6$-3mat-4oR(iB;;N2H_ z4bIf25V*f!Ta=G2Y&1vYjrBqdx)|h^MG83m2in1d%FEu`IchR?gV4wmh?dn`)V>0p!;Gh-ES`uV=XIz`D5ZfCeG0KKf7 zZFx<@AqE{sq{KUf>k;Bd@vUnFVRxK-VlED6^4rIMX#SO5t0#wJHk0ev@a)o#o6v}T za-9y1y6${rSq5I=ux{kQ)2LaH$dcx$j7--m`YUaD+yJ+A_{Lf9j>M-wN%9h?x;Wew zzWh_|d>d@esk}zE;k$25&Hf{BxWnlCN~Q0(a9-5Pm8`u``6ldh`r|Mq0mX1+`+B}( zWWqRN0bWJ8A9z3oyU;}DJ-Gvs;PTXx;$8B~)OR5ae<7*pYLDlvLly0g`a-8E2P5T~GeKFjg zVfW+xB^IiElMz|f_k4%59D+ymzQ=A0Tig66Kr+t5ju|Y0{KE&l=7u`9;m0`Qj!z>v zQ=yaDVIj0EAt7XU6uGc_zCX+sb%%H?n~gp*)O>OBq=*UMj|lGyx~E+dWNPyHsK=w6 z_%8#!2|S;bM*%}{$mdI|y&Aa<7wn87Njg8!89>AEJ4@Op*s+&vP zp3xaOhqgz$9|#kwsano=2WQ&3u<0&66t2=>%2p?O`KdYewXYRG6=c^}pSEP!GOU9W zxU}1HQ5P@EmC(M>dSDT@Fd4T@#+QOZ=~kCzxCuUU!@vu`dfzJ75s71sFonM{s?u77 zi%DkqxazT`Z_xW0m3IoN$J|ybcwX0^tgJ6ywBlI>PQPG=Hc1c>Bb>Up@ps-yD`_{X zxgi#DW~rnaxlm;-3_kCp_)=lQhovSQmBrGo&mis2V+};LaP(neeu3>hRYfOCh9n;J z;tW}8m0kf<7?pz!mnp?b@h8%|<#o9TGQDnBsX4>dC$N&`aH!NHCw|8dmn%)z+CW-|Uuinn`QO0`^ zXC#ze#fqT}347R_<4z&RM9v-RVDGv;G~GoE&cAD8eOr`^cJC!vH4l9s!Wm@FuokhC z1n&mpL<)z}uHvwAUMN&Esef@CP%;0h%hHy)Iwg-)Dj6@snj8G#%$7OR14wL z^cW9Q!kgxH1d83}Gky+%#pA zt=mtrAWMISRJ}mPwh8f_dlj+YcxE`h$!xv7$!naX-q85ZcBBvt;nSaZvr7X@i<&r*~ZE8~^Gw zCZjX7r%v8cD(lT;S0!Oz0qeMdFp+?Q^gXStm$&FEOFSl(wTZc0n_fuHjx}cRjbWBo zt{h*Sf3%n*$H|k#k;c5V@zZi1C7fW4w+5{QGc-u>*yi=%KI*Bz2U(O(NPbu|#baq* z);Cg7OVPG9C4&7{`qhHcv_>VS@|nW2plZydzeaX>B*AW3thAe)W1sHk2uqqbjCOYV zH^=PKnM|VTF4bg;P%VF@Q%(fcDH2=SMsrSE<7HFh=yG`dEzuP(5*_eBsm;cFE`u{4 zI?O&2T|z0u$QCh2ns)a_)%Pp?Kws{Fk$1b%%2{vYvCWTRi*aGc?8r~Kx|VXthqOKi zJuek&9b-|^Hfgm=jDpuaj68t4yVIi^r*LDhRrM5VB)Y{xe3-0A&^aSwPcXelrZ)nd z#YR4s>56=9O|tQO&drtZw5X&_a3otR4q=R`EDnW8o$dEO?QiwnA80;ax18Q8!Ge47 zUqP+N?{Q5S=kTu~D|>yJuGoXLP6VVvqp3O;YSPFxmwhAT=XP1hC3JuOU=GF5#E{Dm zNy8BC$L&2zj@<|CeXxTIGFqHk`G#+nNah{G*$=r;``Spqp*86B;uCN_dXWQ;SVo0r z{Af6X@>}8`(oPa;7YShc`6|nWJ?d0=Ld>I;LtC#@T9iuAZ&I}>M z$^TJU7W$?apBHonG5gFsw3Bn`MerP-045WQ&;Kd;~4jLE77YlKc+b(@wMf0Y${0>H$NF*N@=Iw6e@G z_1sW)SLX<TzDED`-d0xP3y#XN9c48B)Kez4Vatrc$}!(l%Z(D^A8$DC0_Zgh}1 z9=#_bphK;n1lbh83V%dR1x4k*XAWd1gT}l3w_MZ>FYjiaT4SFja6 z9xC{5;%6S=2lzIsX5PT&-v#?IE&MePE6YCKjx@+t;*hmC)C-{L$o`dW=T z$;!HMI|cTozKS^2*O`)P3ttPL@EJ|?5zMtvP6rLTkj4p)LH(K}+(A%_glPZ^_V+{{ z;o^JYw4J)b?!*s|y&x+Gs^m+3P9CC=+x0wG^xbf-DOC(U4AB6sRn*)GTY~bKG&8IY z!HCj5eD<+d0?VCN%%Nov`+_)sM&NqmJ;Qa7`jj{*+U<**`tdZJ1lvLuAu^c3j880q$1}Xyk;PTJP zKswi1%le5P_I*BUZ6-JN&&SJCg4ZXtD(jl3-pCe|%#W+TjO1S(0Cln)!AcTuOoU{k zjNB#@jt}5=?X77QlI}peV0DPwt>dwOTV|@gp@;aOBeMNL1}!=2ip#a|(}W8+si{aG z#cPu+XGQ3*?C;RD(K?voI@8gT$Y(ExTnOzMstN+rsypKe`V?u~xe?+EO`JNfW)h3oSO#_3tFEyH$}! z+2pFwTjGxvdyYY=?W0OO1PwJSFpPu$t$La<2ir-e!&Nx|ASE@N6DXRSCDq0=^^XPl zDU7~=+9Ky?dh4ZJh&r6SpDndaQZAu=q%{;bOduT$r1ugDSeRAK4oVZ1pRHl=^}KD< z_2Z1{O&PD5vgNSijO`GrQy;7$MS z%%SmUu_D&t57Ws9;)!R6TuHpqQX40Ej`b>(GYp^W0-)>&q{c@gl{i_m&SwL=C1IM! zPoZZqKp7&hk)y5@c&UjbUsucXE0I-4*fCJ(cHlUR5vG6oaJfIZtDE6=%ay*>y0}tF z!X>vsTxA(3Er95mtJ=>H)((;WB6G+E8BW;VvN(#WtAQV0Poac2LcPR?k3Ze&yS*XcW+r>ij$EcZ> z=fbA-31~bMteCX^v5r33@9)K|SYG-QZO42qcjoSKI#g7qc+OI_q+M!b@NnQ@(zFzn zNaa`J=+lH)X_vnu^qwr^jLUr016OXs7`+xA^3ga`$8-M>J6=$Vo-$x5cbd-0Ms<|?nvfb*0z$AeWz?HZ zGeSqpB>wY@-rMD{Dq25Y0)ABcy4X6-gI*L%(_MF6 zrR7Lla$drC#1B5zI>%j4fDvt_g`3Dibp7kJH{fZZQFYzH9{)G)r?_t$0Vz^qI2E3K zMHQ8ZzUovcFODz|CU*PL261W5v^T4d?j@HKE>b$%R$Rszb{v(hq|WfbMcas#uF7S5!VMJaW^lX4r=exnDclb8g3)}c0!ZpY(-@`%7&pdcy3<9 z)p5a?|I63=R-N{#!)mRa*qa=ss~1`N6j8~zhLC6qbN5yT92Sg>rE5~ijJC*5^zVp@ zo;&c6giPdzsYP0CS8M4HX}VZ%Y#~A{{onzVr!?fXy6YxgOh4p&pkxu;ju0FVM8e3z z7Pv-s*s~n&$@Fr9`Jn=8ajuW#3C|=AU$qx-h#U1>2#`pDj+IKmHW;!Np}SH0sQ0#D z+jOFuzt;{kjv_1O1pSIN5;ARP?D_S~NGayo_gUuo!dSB?!|>J2p0aWABM!_OR`9x~ z1OZ+t8)M>^gAHUG@1Q0>9V*FN6;^Q_;`cFWeGRr_+nmdQjO-@t(A4;>l{0O0MDgD{7g*5Hpt%(QA^&n4>vAR06jqs{OP`ikqb17a)6hh_6DV_%6LlNY33 zC88kUX|u9i)1C<7S2z#0RxU0|3(U?NTR`TjKtg8V0H?h=i`iju1L5oyjp7rVoSRLo zEXmNxi16I}k0dhXIl8Yu=YeKJ_gr}!*^^OaQSv0;VER-Sf|ogw%i^|=jvxn?4cml1ht!-8Lf7!m9Q{&tcM?xzw_8=ehgVfk3 z-t3NbIM;(17}`@h0)?Xo0EfMKtSn&*enLC>Kd@XpmFUR$yv$1t(1r7qoMFOzc|#6( zGS*F#@8=Q?>qz!y$9(vw8>o@U=yc5w4#{f+1@)||-3T*Ba^e)KQYj-5KHfdyY8o{J^_`b{JSP+IBy4Q#NvW+dlk}YhcRJdB<~0m&(^~iXtY{Z z>D`IbJqS@zq=}PFP}=K4=Q%zm@U`u1_Z)*KLzIQJF(;+JMv~(%Fm~kGOY=XE6->s0 z`5Z=6R?2W=f5!bLi;*+H(d*2;l$U0a{=F>&X`>nr`tjL<=b4D=evEQD>rhk48$_1)zrrhR3oi)=E>$NC};tS8G?6=O@y9_+tQ zs&I6tDje~+H?NtDC8=r}-jR3q^}Jq)2?yW|LhHeAE1c95xGL0%mSjQ7-(h$|sd1IX zF@gWMtFGVm?#ktNUqwy$vF_Ye^E?63mcDhC2){!;p!IhSUVY|xL3ZqzDM#J4HYgWy z2UvCM*7=>cPxdmy{$#|F*GEYco#SV3Zy^v|>aE~8*xemJbBJL|UkF`eeh|x|VaT(y zg%o#z8GF4wLd24P#z>h=w)B3TkLdpEmU`436-TqvXWduTjWYS1TjB29J!*vy2ql*% z2?;ZXQ@Jf#pr~c1Aw!`DXjQ}4Cf;6MPP>T-V`7(DU1R)n4NKspJ~blmxuQO6rRw#mKHDb;SDdvXE#Vn%(QdENXh4LXB*e3Q93btGU z2%5B*8>E-GSvbiky`d#R4FWmG45WG$gb-(E4hNkiI$BNoxkGyNWt`9Z6>tFs$=U%+ z*OTRxV{+UiJPnXFCoDOS{%)ooVt^35g|axkWmJVUwj3mEM`;~||D<4j7B4=pPo7<2 z$HY9gu^yz%M(n5Z8+47m=7?~7m)aQ1JXJ42gi(5Fp^*~s_ai^QA;Fw*5GIW1X|ZvM z5M4RwbY|3I>%)?H2c35Lqqo^Y$BJ@&kI$}>Rc1ju__!g6%l7^{pEEQuZjk@0u2hAT zmYs^CoYRfkWrNyGr99{LW{NeeBYa0>s6dbad{vob;hs%?6%8h_(_r1%ooG=ZDASg< zO;6P1Qfd0!kK1bgBNI)5V;7iR6ZeHF=BqW@ga%1au{0?8 zjtszCXc9ypHg7f1v99$1vpZCSi7P2M-&l;Zn!GE{Rtf1o|UX0pLaw2kgb z?o%J3Wy^h$yv2FBY!rZZ;^@fikv~O$5MFc2xnQoPbDUxNhD>8*DF?*$X%ksvQl?{Q z;5pjX(YIb)?hzERhsX>as=33axCk*`OV=Lc(QvupyBH zIpHets=bvA0S&_jdfV5Y!0a+hx>u}Trs+e4^l>dauFz)T5K+R^MCm0%cQ!H}HdP&y zz?EuwbPcz7fA1#m)aCr_{DlR>iy9|P>%K{|Pf(MS)mo1|QG+kE>EuIEUMBdqZ{GKEQ? zUa?1HS(&B64167vAInjNSiB8hf6PN&4}Sf!zMTow`j+bu63ihL)n8v;D|l02&EUMR zZ4$7Q9lN@f2MX9^p~|#7$r16IU5t~iYiOK&kpcFM*q3PU7s#QF)p@{*+PdJxxKZRv zXCr}U(X+xSXAVSuDlK(z zzUyrcRi!7x2~{Paw&^5=nmsL=u%dwJDh90FNd^}6k&h<f@Oc{C}L(}MH7g@~!pY^IkA91s=3K&- zjaamui3oZo&%0ac<^&;uW3~4s)31CTKN0RBLc1bpivDv8BuS&2V zFuoC1f#5mr@1dE&AH59D0io-12eY;fDY|eAeNoKQv9dzok*Fkji#S(VJLb$md!kNjt(PiClW~|6#{l^Wt1zx$JymG4K?!(%#1f+utMa`O zJ^#jDK9RtPFKx!l$wTWD38{DJ1<+rH+jyW>Qgoe+;})wM=pA1WJM0EXcpRp%Q&25R z+b+3;dA(ey&h`L}>LEi!b&Y>sl65H7m!z5hYONmrOp=FZ^os&Df|2LfaVZq3F$@m@Jye4fNJ>1J1ovf>nL@ z1o@I=%ob82Bm=qu>K{N)Ky!w-MO?wvscqV#PHResdW1iWJ<W4RP96l4|a?M$T_Z(i|Vw6 zQNjp2GYNU-dV&;{-nNqp)s&`n^mtQmi{B4k963Y^>`C8Cf7V&l6A|d?XPEQ~$OqfY zGiu~h)kzn&&O00yz2OR|dIONnveDU~4_THkoKEw@(#pKqIipxqDl#S~E6<*wvYf=G%CtO_qCAsA=meJsHvj#Iqk@8OscsfzD0LP*lkNJ~@{FSmGHP;G zL0AQ#zbM7$i%IGC%iD}Z-bEd~EceaV8~2u5OC_2!PCAQ>-#?2+vW0^etmf6ETjcIW zf3p~)XfADljs)Q%!T{RkJe6WcD??M2{hJH@lx*d)MzfHgCc)XJrQO z!_nX-20|8o^iM5Qwt^)B4qk_sefv}cx_(v}P36yQaYnqFi}i4oTM zlZQvgI0p{>NYc#lek{zrgU9J@Q9AyQl|)uy^{DJG36 zZG0X#G#k%e%7Ti}$pIiw$)SfYE^Yb=xYp1CYPBZjDc-!penz^u5D44DXN3D>J^SKy zA;{VBjm0j_fL%YRMRWF8cC6Ci6iRr?#k8_^is2iU7-oR581{(B4|1Xh@FJso3SPyz zc>{Zy;)6D^y3;iO#W&A0MYLkyGMsFlOUz;yd@m~cPOiQv;n4>~HmzP_$@`#vFQ?(G zy^XI*z9FWI zxhX~E324)8U2>|bKIM#a?Gkh5jpm}k>+!%lJj)y?TD~5XtEp;;g=3SpLyt3^fHDw! zhE@j8UGj>l^K@?+t0rMo87I@68hf3Kz#v>oE9mM#WT)EhwY$p5C0>pPg?Rj?@5hIU z@6R^A!&s(Y@XtvoCDO;I&@Wik%(^K_LeR(ID0RIFQI5?ePIJ<63A6dc(UJ#!$R|hc zmcHrRbjJFQL>RlKFcvfFv9xkmR_Y0GX!(e|VK8p$WvuIf_ypwQeJtlwdIrmRwZV~N zlp_mU&2iXyr+TSgXB>s-@%m9zv1?9qvGeeS6~=m@?1@Go z_-DFmuBgqt%-7DfF{rl|-E4?ty?Mhmf-o2AsB_Iw_%L*J{ln-CTI1sAcETU|AFOEY z&@7#vH(auYk;}Yy^6Rb>iV=#W`(j7>cY!s?GzXe# zd3jYJ)2;Q~%~%lGAFw3-rHdIFOp4-C8{nA%VO;&FuR({|k&HaHsW47gg)m2f`p&z7 zY!+Ldd3(Mw(@g3+MEbK85`O!x5GHz|QLR8N8=seTA|c(~gO^_+P4=m$QHElXZA2Zl z0F?@f%K14E*6?=(_YYVfEj#E9s{8yN9gL`jY(&ak7yq3}FXoG=0gdweyjYh{5vM*W z3?h-(`Y4C3<%?m1A})!2(5q_uxhJ?g z%U2{7hV3q&SdZl@Tyz{%WsT_c8P#*IrnP{*&vd0{PB-_aIaWSi=N;31W*wNCi<<$X zMl>4KcA$?kY2L!4w_mp|Lg(S02Q97P;l|wEBzC0Tv|Uez3q#&~aNGGg83nrZp_9i> z&Ulz+_MS1Ei=Ux1RVn?B_d-k3B3XG-xr?@qOKF>QBd zMJNY-&&cz*ntI^%=wTh2r!{tO=N+TB%+r0!@Ko`h@8dNUZpzy!?}#A{2Nn1G7M^4J z)teR$s!4JgK_BX9$=p9%@R%6F(93i52PgN~0w6P^USG1MycJfde_OAO#eN0{>#H%Y zvv7wuDX_a`32&&{B!{Hj9fBvaFH3~{#KE&pxv#&NEPr~gNB@M}VTSTdGy*)A_pR-N z`VnutK=(xm_dyNx!ijI-z1F#{=}rBW849@@`u!;J6Gc}esj=Wp1=50i5^we-4+qPi zkXJn1+)ru}@W960>RM9`V=umR77kgQ{9@oQA7d46-|bbct^G+v(D~EhE+^80rR&3R zV{(LYOw{*uXu>HhGciz=d2?yzItX1VESQ{S2$FJL|fH#+Gm3z=G%-(z>+ zB5W18?F7KV7-Y^{T-+uP>~?b>WI4Oz5iRdnF7g%?r%7o>%i6SHAMmOfTe zozdw58~th2T=*Uc=jIG8n$uHxoSWLHG$t7C2%nYf^DBJ_x>^#AS8&wORwX>^2mxiP zf(?8~ygdYv3Bs{{yqupWp1QC{FUqnE9kGk9;2A}?jQ|$aw>|I+XD_AH`FK2MAXv8& zZQA;%)M8x?DcGZ4FHiP4Pt1VlYK64THnfZ3YzGWU4+y^ZM>OftXmzry{0ua*_VDU< z{m}VBHZPjnJP3mSV6C@SPI1Lz>~3!h)Z|d@w8quK{#N~7Q68#o&p(0q9V+=M=*h?9 zHHR&+n4(V|mcBOUu9XF~lo$iQN*Lj2^;oBHD<|SM7h@+Bs z*q2p)g26N&;?l6G^xpSBQ+jruYdBg*=jpe#8y$z8^g9gV*voL*!^mP^Y}F;ZLz~IW$2C9v2lNb5)`@Pa~Po?r3uO?=1G zqJH7SNGVnQpWe#0Px&gf32^B{bS61XcA${gHb3?^NUf%YDYw=2Y)Y-XxI7=bFmq`; z>!(*|wt?7id{eCANby0E{Zw1xc2AlRMjKvVOY6e{N{Y2zBmY&%aw_@e?DXJ0NfWi< z*k2a>X#YD?_})^Xg2}T316rao1Q>^bfl65i2YQdnvmzD?;v^kanIK-Q*Q~4{l;alg(hkh=Nuh*u-{A zWzCj)&??-xg7IE2UUv^}DG#uKSIC`8)uUb7u@sL+E~CRt8QJ}Noy?p;6&dXnOOUyJ zQUPvsyorLE>u!q3{TIHxasO@8!9P&)D>YIT%wEO=9ZIID&;QQKDuUMbRnFe06IaQF zCB2F7eC)DUc@m*U^C&^Z@n(|{|D2F?_hoZp0zXG7ut(BjNn-En?@(C*4u{5G0 zN|G{dvp6kAZC6yZ*5qeEXy<0J1<7WX1z-6_8f+KkX1NRP+SZERt^0I6&QxP$B$Cj< z#|5FC^}c4`)cTQY%et^E23w&#DruUb09}QD^PM)91-U%8dz53^&v17m+5DzA!K1#@ zDsX=WaJTE+)tDZiw06FTzQ}a+`{wU8?Cz$HjU6n$-O?C({D_%XwCf(Lr}F7MU3@Em zb87oWHt9TV?p@5no?#P&L@&0YnDh*mgHY)C%QG+TTH2?_xpZ!G{_T;&ctQ(##4|KH z=b6AOi2clI!%iQ0TE6y?#!0gQ)mN1EDn8{!wn;v&t5$sL;F;Lw=&@i5L!#6%me0LEeH}n$jm0R4d5piz^TJ+_p=8<1Q zr6Xf__E*7ivPf)c*eU(sS;>V7X^bzvkVW%}RZZQArnZS|gDRYb>A2`z=j~p~HC!}Z ziLUV4PCYR_&Gc4bwFNcwnLdNqHE-@zOwzuF`7@L#ZfsQ=F_q ziayp-V^G6^yG^1h7*0;@1Gz0uo4$Sq$4}DeolmO5Bi5I2-JT%V*<5{vm^xv7?}lHP z#4h2HgGIuC$x;{lnJPAM%vV`HyYP4b z^cCdJ%FjuTN~IXnveYK20=e;DL4SivowBnCpG@mb#th@hJ4FF7Y0z6)Od7(qG@W-> z`@=N|!&Q0W&J9kDumd5gL63HE_PR&idCq0#OQtWpBKwJdewDIEPJmJtH7LXSEM6_} zUaAuzl|wwMIc#6Nc;R8LLur0ArQw6)Vq_lsht zX}6!WEggLb!hT8;WIPoBtNjXf6R8bHGWO&z$>CPzURDLnGNJ3mZ3q(@o&ZSMc}BIe zluH*4*Vw)N!?UvdAC8r^`JuQ>;g3&~x@%%XsHH3pRAGh`p68983DxPTCnVZFE7n;h zC(ws=r^;XFM@d5IC3K&!-0gMUohQS2Sz+B>szMl9*iZJc$J}qj+&Gc-%Z@iV1R{~| z@8DemP9^(fU&4r(4eE+$RgTuGYbprJFydy%-5}y3*wcJrJ-<|gNdl*kx21^Le6B=VbBhUlhsQQ+}y;f~UW zyXn3VoZ2ZkP(W(FHRYQsO_%;|-6&1W%tmJmqJH~fa-7=ajb%#nkA=FPkySPZ-=2>O zTMF*lqdP9_tw&Ky2aU0N#~)v_NLU0>JFEB33xgF~1QwsI*20+IP>036F}m9}%J8G0 z>5G28e8Pf4X`>`8z2&y1yJe^}YgAydw!7;TH+bQ>oc3dnBFw<;v>#M{Ql983LladD|K8aj0_7KGwk#*rMxJ(uqJW$ zS=YGmEP3Ne{>Qeq8@rjwGQT{S>%)G^k|>`HBst@vVZX~5bJ2wSAj4WY=J!%xpHxfb zc)zRi8$e7ysZ`BO+_9tDMASAQJ`c$7SJvJsx-0LBA{R)%N$;L$cXB2Bn#B@r$m|-q zDKkl0w=VqOA{MJbcc*IE)88P_{=TL?{BrwB^KbA>s%lLOZ`)bzuQqaf$qH2E~$iF*kJGkJywVq0U@q@DX^lN8K2oPSE7sCcY^hZ0?K*6!i_mxVbr#+^`yhIy`>EZ z@+68>@42`La&ns6VP$^aHY=L@G2?9;s892>`nu)ISRgBW0e!O8~ zAb~L~JP>w8TAps^qWI4=}4zJlkP*fmc zYK2+lUJMfUI=yh{0v05ZRGiuIVgvfgyy3mDlm%I?$F>rrmDe{go~|cg%e~z7j)mX% ziaiTXY*z@&kj8>6{)Y*u0Qbmya|yv*SN}n$ zCRum5yILBuMI9V_HfMt=v;W*_c2mAdsOLECO53~EmyPRon7MNpzae1LrjO>zMb_2T z>j^|`S~oy>Rcpg+(^aveJX0NmK3vd=V5$B593QiGNK-OKjwtEYGBp^C&W-Gd=oA%U z!vqo5s59ij&m2(vHIWz8+mbW|nc3aU-xrp^bM!1p7*!tSdf3*B)KJPDgu%R~DHM+v?KOkM z9JGad{7;}dp)%|8V4{$@C5T_?ImvLdk++4j9K${i0+Xs7Ds*7(t>f^?u}No%Wi2N# zLamkDF4U7W)Q~DAAv7f%MZvZEaV$H0GO8^~4Q>~W>X_DzDVx{aA)!(a<_J3>Lh5ei zuZeOj^uS?|% z!xYNq>CIF2IzmAsEY0E)L!ZRUw-1^S`cp>Lkd)usiom5aHX=>u2if~^YY8!N*12~k z=H+?Ll<}c=_NcygO4Ng_oApyWrzUJ&4c2fHan*5fJ4Z}9a)sqSa1W=z&(t~$3~iRBAbb@zf5WhBoUDvy^7>Z% z;0?rRGk9J=AR+Z`Oboe+7E}KQ`6d9LOs!UE$o5+K#vR%l?D*uWC-%Hgp;kpxDIg+Wx`)`z`b>CN+qB(*)2zQJmgjY&_B(bwGB!+{;K1p$E1kk~ zfSQvD0Uu_||6S75FjYAEW+0ajbEjGF%I>&R=yKrDn|3<-rNl!5S4nBYl`EV1a0BNu zmllC+MZ@#$;12YcsMy;pvcC%$(xV-W1FMtWem#_rUf2xDaBbyFEzX#sN+~2LY8#*p zv~fzV&at2T@;OsIj)+Wu-q^wSa{-=P418HS^6yf>Lf?nBz8Hm*9p1iiA#+6^$6ok3 zpscfP?=YN^>103d{ho44{8!H&fi<~2+6Gwv^D-~S3g8`5r(bU(l#AOhLufg%yU8PO zT7QF7UH;hqr2yU@QA6*kV|VM;qiX@@9~UIfxduKGD0(v;OyD0Q3123|C@lmsHKG27 zmnqe+P|0G8TOFZ6Gu3asn#>$pdGZMWoA2Hn-qIl4)+e?e`5uUI-F z9o@7$UcCE=Wh&p*(}lbRszLU&I-cP`SRyUbfNsU9dHtK~y$nlIQtXA(*oo6_6(bQF zgOIw-3^ED|jdr|#YEt#QcPG3)m}zKTm}C%;LV5jX+kwhkCa3H|P4HFS)g#cxUTwNK z=etU`Y>XKfLMcqv$b6f0)nQAU>lJ-1Uk9uJo{Uz-uDT->-=$VRAAeiI8@E0`WSDgX zh`bj1`dpt&ta!dZ{c}=XWc)`=hFe(gQ3F0;zTXYj*LLq(kl1@w`waeAiD7W&N>3epV6p@FRPLi!0pTHKy!mc;7LW zX7p;Kq#U^ij#T8oJ7iGT`2K{OcM(wlOC!17;cpEa_NYYH?|XcRj>KVNJwcupXF-@k zG;P0a()ZSI!7*O3e1AFU74FUuv5knElh8qCz6}n=DgQuEY-^XDvM&cwzbD*BCFXx0 z@}#=3+xfL`9${9Ds57tkmA3ZLwO-tyJO5W@8on=7mwo;8LCEb_$fLap;}I@qoANKW zig&09hH#zwEP!nN8q~<{{9)5Kbi8qy_6Jrhg3Jvzk)8|$_QOUg#$t0KWsF$h>0<5V z`Qo>1XKN|Z#S?@ihVz5h!R1s>?#-JXdJ$4*vI{v zcdn0Favp-2jVhwnn;8GO0?i|3Us;x>j5Zhe z(~nzGtqE2s@~@d7>zDCCJ_$==05f_35i)$(l&-7#D@gyhG8&<2y!ZG zaZs7){vB^8oLNh&Q1iFnAHN<*B+#Yx#pt%PJC9kO{m1~L+g7h8CM2WD5WQgokkv=- z&HOLFQrF-Oy|NaUH;P!4CEabbB!$o*qD$ozoP9|qwVi&+o1x8(lJ(cAc&Q$ECPA>$ ztRsR7V_za-2KTxn2zt!XQY@bUzJWpb-9ucuCM%bz)eh~3)Af^{i7-#rOsTm_1C#j$ zV8vyRqJkxa#Gd-B{`%~U)MGkQ1GmbpyO+cc3rIBSHeuJb9_8$#7ig{LO^#;%sP2ROzo=@($1cI3mN4*|Qjrtcp}@ z4ds#i)9^IpM=7H*KYODrmBtRtaHd4HLzvZ52oIP6pbbBP0@M6&<2#X$JD|sR8uZtL!TWWweDugOLklRJFdaGE`@^$2XM&|I<6>JNGb!M=!7KF3y1s4RacE z)VM+4-oB2?AiIJq7P<;RZAOf9Ca2XxMTvNUWH?oN_K4~p(FAtcv;5Kd9B3|+!j!Pi z$#LxnFUHx`RKs)yyqdXD#0Z@mYn_;(*8FUwP-v2=28}gu`FV)fIz-~brRIHn2~zzB z9d*(k9n*<8kxy8o)Gvpd*cNp+7%PmKAlAGLgS^i% zm&sChYyg<2FZSivm+jx%tjxR^E=nv6CscFKF}&&Lsv}v!0gkW9gRQCin7` zlFtHv7gcz9e0#WQx^y%0_JRs|RKbFJW^z8>HB7k8sicD5ph{PQVYH*`H#1grW%(3Z z9@nHJojB)&UiLC77xzws#<0JBacY!m@5PQ@QyBPQmOnxPb0?=4E*Fm$;MhIP8W{oJ0DltccT!oe z3a%cx3;YUic^@F;3poomf6wuOfiuQY@2dhOUZ&A0svP(5OsL@7iOk4%nK*4_#yXJj zfvSsFQQ6txyYI-xNA^QKrW}JfY1Y)GT4)1UQElbSlc!bQLCVKzZntqmw-#=L#p5GP zeutu?Jg+Rnu5;dXIEp;amF? zFEPLX;WK7i4ZYTR1pRE);{B)oqc0NI*DMK;V1VqZ=~^@ly~z}uSv|!{{#n0$!%jXF z{*;3P?SVu~P)Fyh8bDsYHIb7S8rBmjLuw{W+Mbd}a6eZdB7jX?snhh0=Tx;K9KqH5 z2XcwRw)rGU?NlbPl(d~{xUHHU=MxSx6SF8f>WIwofDO5L^6;A`*iHKVvGL7koGqq_ z(NTO;1-Cyk_b(nU&z_RM=Y9^!ph4z~;_&isl{xLtNA970qw|LhYz4i|!YZ3p1t25Y zt`2{$)?$fyl&Duq(v?XuwAZl#u-&lA@-a2oa_ute3Vq8!-ox-pOCJEeTg z`X!Ic5*UMA<9hVn+TL@c^}^WKkxU$;ragrsWvf>k3$~{uSiYUXoyTKZUoN8yWi9S| ztjrpy-6RS_Kg?(`aUA}d?A6ZPN2uSIFn@+zsvsiu#p9&uoA_%_=$D=bX9qxGZ!q%( zQB8q-|Do=lJ1*;w3WrDI3ghHh%DuqK10Mf;_dC`<>(U#w3#jVgtys>LCRMN3>8;Ga z7bBQa*Y=(F0yGq*w4`)N|dX&EBNk=r^|Bd}p9#BG`t zAbyg0;{#}*OoX*w08m7wWQ>xzpDzqma)G%%!tU`;gZfA-_qCS_;LCuoPy~nqc%KNk z6uu=_9vc|jR+6Io$lmm^TJuS4jD&^eVzDoKG`4=O8BTFG>zDh+(*3#K4-f8qx=2!d z-l~#g(gOmDv(`T49r1fIu5{tOG!)!9bNT48Fnfxnjm)a7+Hc4BkLeS?`^%S zA%nO#!)}RY;kS`&lAth>uB6`}Yg+ML?7>@7Hi6HSI!mjfmg(On0;{6){bE2pu$rq= zN&AQek!o@stV(qv#Zcc;s6(q=2Ak`JUoU=U0rZ}dt5pRQgVQm+g8UIKDLDG0OwU1Z zN`LK_AMWC~iTYo?*Oh*mJa!38witN0BZAo&Fth({>zw2Ab8S^Ef`A6q%@$3#Cz4)x zKX?p1P5kU^Fl5sH&>=%FZ^)TJn)Jys8le?&r;?RwTX%fvV6=VQ&`7Rhm0fyK8NR;a zNWDHBwf58kt39=9q!V)#hU>JLXPRuZ1{GM zDH_mmpU%QP-oMg___dJ*Fy~Zx0fD&D+h*$j)f{OT>~tdpFc$tyB!}OiLMr&-Ch)%z z0j#|UdPSan_5Y{~Z$%-wZ>|AByfLg_=QnJVulv;cTB1sH$E06~c#IM?Tq zf2}1%m;3ZLC=U9+TO;=X-Kh*9Y2X(MnnAy!Kb-^rS&RN{4*pLwxLWJhyYl9Q4bx&@ z9X<-gp+G!6mj#>+MAysGRlvIauMW4vYL^?W7kydxBWtAqC=m#GTS6Y!Bdi|hhXqeQ zq?wXGw}X!g0JXC8vw%E=*!1ls8AcgP4I!o2|&2-Q?>3NpJpdlxLuhL*Sj-kIYkYevOajdeh8oNdBw86>0vc(){%)P@zho2)Fe5 z%#JZZow#7l1a!hosyPbTFCS3!vtDKIpt*DvuhO{+1XZLFfcP?9mw+?#07-P@f=50e zV7r~-x;<4+Fc-3v)guQN+{<<*Itnkz#Y9kDCpsw}Kv{}`cWujS@|jf6zc!HMk}bH1Pxdq^o3?8DV|4f)kELjQ9x zb8phQ&woyWYbwQBIR?>$T74sLZv_v4guoqu{r@kbdHa-@fNROJe-8bFT?6xd{55?s zMhVgDW1dl@;iNjY>ZcsF+)sWtu3A674O#5(lbjsiEPiaQfExVk4gh!JPDW{Af3ZHb zmpgv5I0E~$91mw@lV0RY`(d}CMhGVJio5u>P1c>_@^__WwU?8Qb9_W`oS)QKRHc@l SHz8mVhLo%Tr33{3J^MeD0DUC@ literal 0 HcmV?d00001 diff --git a/Libraries/oneDAL/daal4py_Getting_Started/License.txt b/Libraries/oneDAL/daal4py_Getting_Started/License.txt new file mode 100644 index 0000000000..a3ab05efce --- /dev/null +++ b/Libraries/oneDAL/daal4py_Getting_Started/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +© 2020 GitHub, Inc. \ No newline at end of file diff --git a/Libraries/oneDAL/daal4py_Getting_Started/README.md b/Libraries/oneDAL/daal4py_Getting_Started/README.md new file mode 100755 index 0000000000..8267be8bfb --- /dev/null +++ b/Libraries/oneDAL/daal4py_Getting_Started/README.md @@ -0,0 +1,149 @@ +# daal4py Getting Started +This Getting Started sample code show how to do batch linear regression using the python API package daal4py from oneDAL. It demonstrates how to use software products that can be found in the [Intel oneAPI Data Analytics Library](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onedal.html) or the [Intel AI Analytics Toolkit powered by oneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit.html). + +| Optimized for | Description +| :--- | :--- +| OS | 64-bit Linux: Ubuntu 18.04 or higher, 64-bit Windows 10, macOS 10.14 or higher +| Hardware | Intel Atom® Processors; Intel® Core™ Processor Family; Intel® Xeon® Processor Family; Intel® Xeon® Scalable Performance Processor Family +| Software | oneDAL Software Library, Python version 2.7 or >= 3.6, conda-build version >= 3, C++ compiler with C++11 support, Pickle, Pandas, NumPy +| What you will learn | basic oneDAL programming model for Intel CPU +| Time to complete | 5 minutes + +## Purpose + +daal4py is a simplified API to Intel® DAAL that allows for fast usage of the framework suited for Data Scientists or Machine Learning users. Built to help provide an abstraction to Intel® DAAL for either direct usage or integration into one's own framework. + +In this sample you will run a batch Linear Regression model with oneDAL daal4py library memory objects. You will also learn how to train a model and save the information to a file. + +## Key Implementation Details +This Getting Started sample code is implemented for CPU using the Python language. The example assumes you have daal4py and scikit-learn installed inside a conda environment, similar to what is delivered with the installation of the Intel(R) Distribution for Python as part of the [oneAPI AI Analytics Toolkit powered by oneAPI](https://software.intel.com/en-us/oneapi/ai-kit). + +## License +This code sample is licensed under MIT license + +## Building daal4py for CPU + +oneAPI Data Analytics Library is ready for use once you finish the Intel AI Analytics Toolkit installation, and have run the post installation script. + +You can refer to the oneAPI [main page](https://software.intel.com/en-us/oneapi) for toolkit installation, and the Toolkit [Getting Started Guide for Linux](https://software.intel.com/en-us/get-started-with-intel-oneapi-linux-get-started-with-the-intel-ai-analytics-toolkit) for post-installation steps and scripts. + +### Activate conda environment With Root Access + +Please follow the Getting Started Guide steps (above) to set up your oneAPI environment with the setvars.sh script. Then navigate in linux shell to your oneapi installation path, typically `~/intel/inteloneapi`. Intel Python environment will be activte by default. However, if you activated another environment, you can return with the following command: + +#### On a Linux* System +``` +source activate base +``` + +### Activate conda environment Without Root Access (Optional) + +By default, the Intel AI Analytics Toolkit is installed in the inteloneapi folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command: + +#### On a Linux* System +``` +conda create --name user_base --clone base +``` + +Then activate your conda environment with the following command: + +``` +source activate user_base +``` + +### Install Jupyter Notebook + +Launch Jupyter Notebook in the directory housing the code example + +``` +conda install jupyter nb_conda_kernels +``` + +#### View in Jupyter Notebook + +_Note: This distributed execution cannot be launched from the jupyter notebook version, but you can still view inside the notebook to follow the included write-up and description._ + +Launch Jupyter Notebook in the directory housing the code example + +``` +jupyter notebook +``` +## Running the Sample + +### Running the Sample as a Jupyter Notebook + +Open .pynb file and run cells in Jupyter Notebook using the "Run" button (see image) + +![Click the Run Button in the Jupyter Notebook](Jupyter_Run.jpg "Run Button on Jupyter Notebook") + +##### Expected Printed Output for Cells (with similar numbers): +``` +Here's our model: + + + NumberOfBetas: 14 + +NumberOfResponses: 1 + +InterceptFlag: False + +Beta: array( + [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03 + 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03 + -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01 + 1.58423529e-02 -4.57542900e-01]], + dtype=float64, shape=(1, 14)) + +NumberOfFeatures: 13 + +Here is one of our loaded model's features: + + [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03 + 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03 + -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01 + 1.58423529e-02 -4.57542900e-01]] +[CODE_SAMPLE_COMPLETED_SUCCESFULLY] +``` + + +### Running the Sample as a Python File + +Open notebook in Jupyter and download as python file + +![Download as python file in the Jupyter Notebook](Jupyter_Save_Py.jpg "Download as python file in the Jupyter Notebook") + +Run the Program + +`python IntelPython_GettingStarted.py` + +The output files of the script will be saved in the included models and results directories. + +##### Expected Printed Output (with similar numbers): +``` +Here's our model: + + + NumberOfBetas: 14 + +NumberOfResponses: 1 + +InterceptFlag: False + +Beta: array( + [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03 + 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03 + -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01 + 1.58423529e-02 -4.57542900e-01]], + dtype=float64, shape=(1, 14)) + +NumberOfFeatures: 13 + +Here is one of our loaded model's features: + + [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03 + 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03 + -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01 + 1.58423529e-02 -4.57542900e-01]] +[CODE_SAMPLE_COMPLETED_SUCCESFULLY] +``` + diff --git a/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.ipynb b/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.ipynb new file mode 100755 index 0000000000..521b43af6b --- /dev/null +++ b/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# =============================================================\n", + "# Copyright © 2020 Intel Corporation\n", + "# \n", + "# SPDX-License-Identifier: MIT\n", + "# =============================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# IntelPython Getting Started Example for Shared Memory Systems" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing and Organizing Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we will be predicting **prices of houses in Boston** based on the features of each house.\n", + "\n", + "Let's start by **importing** all necessary data and packages." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "##### Linear regression example for shared memory systems #####\n", + "import daal4py as d4p\n", + "from sklearn.datasets import load_boston\n", + "from sklearn.model_selection import train_test_split\n", + "import pandas as pd\n", + "import numpy as np\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load** in the dataset and **organize** it as necessary to work with our model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# loading in the data\n", + "data = load_boston()\n", + "\n", + "# organizing variables used in the model for prediction\n", + "X = data.data # house characteristics\n", + "y = data.target[np.newaxis].T # house price\n", + "\n", + "# splitting the data for training and testing, with a 25% test dataset size\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state =1693)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training and Saving the Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's **train our model** and look at the model's features!" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# training the model for prediction\n", + "train_result = d4p.linear_regression_training().compute(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To **get training model information** and **save it to a file**:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here's our model:\n", + "\n", + "\n", + " NumberOfBetas: 14\n", + "\n", + "NumberOfResponses: 1\n", + "\n", + "InterceptFlag: False\n", + "\n", + "Beta: array(\n", + " [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03\n", + " 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03\n", + " -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01\n", + " 1.58423529e-02 -4.57542900e-01]],\n", + " dtype=float64, shape=(1, 14))\n", + "\n", + "NumberOfFeatures: 13 \n", + "\n" + ] + } + ], + "source": [ + "# retrieving and printing training model\n", + "model = train_result.model\n", + "print(\"Here's our model:\\n\\n\\n\", model , \"\\n\")\n", + "\n", + "model_filename = './models/linear_regression_batch.sav'\n", + "\n", + "# saving model to a file\n", + "pickle.dump(model, open(model_filename, \"wb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load up the model** and look at one of the model's features." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is one of our loaded model's features: \n", + "\n", + " [[ 0.00000000e+00 -1.05416344e-01 5.25259886e-02 4.26844883e-03\n", + " 2.76607367e+00 -2.82517989e+00 5.49968304e+00 3.48833264e-03\n", + " -8.73247684e-01 1.74005447e-01 -8.38917510e-03 -3.28044397e-01\n", + " 1.58423529e-02 -4.57542900e-01]]\n" + ] + } + ], + "source": [ + "# loading the training model from a file\n", + "loaded_model = pickle.load(open(model_filename, \"rb\"))\n", + "print(\"Here is one of our loaded model's features: \\n\\n\", loaded_model.Beta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making a Prediction and Saving the Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time to **make a prediction!**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# now predicting the target feature(s) using the trained model\n", + "y_pred = d4p.linear_regression_prediction().compute(X_test, loaded_model).prediction " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **export the results to a CSV file**." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CODE_SAMPLE_COMPLETED_SUCCESFULLY]\n" + ] + } + ], + "source": [ + "np.savetxt(\"./results/linear_regression_batch_results.csv\", y_pred, delimiter = \",\")\n", + "print(\"[CODE_SAMPLE_COMPLETED_SUCCESFULLY]\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.py b/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.py new file mode 100755 index 0000000000..1719881fe8 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +''' +============================================================= +Copyright © 2020 Intel Corporation + +SPDX-License-Identifier: MIT +============================================================= +''' + +# # IntelPython Getting Started Example for Shared Memory Systems + +# ## Importing and Organizing Data + +# In this example we will be predicting **prices of houses in Boston** based on the features of each house. +# +# Let's start by **importing** all necessary data and packages. + +# In[2]: + + +##### Linear regression example for shared memory systems ##### +import daal4py as d4p +from sklearn.datasets import load_boston +from sklearn.model_selection import train_test_split +import pandas as pd +import numpy as np +import pickle + + +# Now let's **load** in the dataset and **organize** it as necessary to work with our model. + +# In[3]: + + +# loading in the data +data = load_boston() + +# organizing variables used in the model for prediction +X = data.data # house characteristics +y = data.target[np.newaxis].T # house price + +# splitting the data for training and testing, with a 25% test dataset size +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state =1693) + + +# ## Training and Saving the Model + +# Let's **train our model** and look at the model's features! + +# In[4]: + + +# training the model for prediction +train_result = d4p.linear_regression_training().compute(X_train, y_train) + + +# To **get training model information** and **save it to a file**: + +# In[5]: + + +# retrieving and printing training model +model = train_result.model +print("Here's our model:\n\n\n", model , "\n") + +model_filename = './models/linear_regression_batch.sav' + +# saving model to a file +pickle.dump(model, open(model_filename, "wb")) + + +# Now let's **load up the model** and look at one of the model's features. + +# In[6]: + + +# loading the training model from a file +loaded_model = pickle.load(open(model_filename, "rb")) +print("Here is one of our loaded model's features: \n\n", loaded_model.Beta) + + +# ## Making a Prediction and Saving the Results + +# Time to **make a prediction!** + +# In[7]: + + +# now predicting the target feature(s) using the trained model +y_pred = d4p.linear_regression_prediction().compute(X_test, loaded_model).prediction + + +# Now let's **export the results to a CSV file**. + +# In[8]: + + +np.savetxt("./results/linear_regression_batch_results.csv", y_pred, delimiter = ",") +print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]") + diff --git a/Libraries/oneDAL/daal4py_Getting_Started/models/store_models_in_this_folder.txt b/Libraries/oneDAL/daal4py_Getting_Started/models/store_models_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Getting_Started/results/store_results_in_this_folder.txt b/Libraries/oneDAL/daal4py_Getting_Started/results/store_results_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Getting_Started/sample.json b/Libraries/oneDAL/daal4py_Getting_Started/sample.json new file mode 100755 index 0000000000..2a255231ce --- /dev/null +++ b/Libraries/oneDAL/daal4py_Getting_Started/sample.json @@ -0,0 +1,22 @@ +{ + "guid": "2E6A2E22-035F-493B-B471-DFD8CF8F8256", + "name": "daal4py Getting Started", + "categories": ["Toolkit/Intel® AI Analytics Toolkit/oneDAL"], + "description": "This Getting Started sample code shows how to do batch linear regression using the python API package daal4py for oneDAL", + "builder": ["cli"], + "languages": [{"python":{}}], + "dependencies": ["oneDAL"], + "os":["linux"], + "targetDevice": ["CPU"], + "ciTests": { + "linux": [ + { + "env": ["source /opt/intel/oneapi/setvars.sh --force", "source activate base"], + "id": "d4p_GS_py", + "steps": [ + "python daal4py_GettingStarted.py" + ] + } + ] +} +} From add6cc7c27952c4274a21a06c5eec769cbf25cfe Mon Sep 17 00:00:00 2001 From: Andrey <41368386+andrey4latyshev@users.noreply.github.com> Date: Mon, 24 Aug 2020 19:46:28 +0300 Subject: [PATCH 14/17] Add AWS IoT Code Sample (#18) * Sample .cpp, cmake and info files added * Update README.md * Sample.json added * Deleted extra files * Info files fixes * Update README.md * Update sample.json * Copyright and GUID fixed * Update sample.json --- .../aws-pub-sub/CMakeLists.txt | 82 ++++ .../aws-pub-sub/License.txt | 7 + .../IoTConnectionTools/aws-pub-sub/README.md | 69 ++++ .../aws-pub-sub/cpp/main.cpp | 374 ++++++++++++++++++ .../aws-pub-sub/sample.json | 22 ++ 5 files changed, 554 insertions(+) create mode 100644 Tools/IoTConnectionTools/aws-pub-sub/CMakeLists.txt create mode 100644 Tools/IoTConnectionTools/aws-pub-sub/License.txt create mode 100644 Tools/IoTConnectionTools/aws-pub-sub/README.md create mode 100644 Tools/IoTConnectionTools/aws-pub-sub/cpp/main.cpp create mode 100644 Tools/IoTConnectionTools/aws-pub-sub/sample.json diff --git a/Tools/IoTConnectionTools/aws-pub-sub/CMakeLists.txt b/Tools/IoTConnectionTools/aws-pub-sub/CMakeLists.txt new file mode 100644 index 0000000000..89690f2a2a --- /dev/null +++ b/Tools/IoTConnectionTools/aws-pub-sub/CMakeLists.txt @@ -0,0 +1,82 @@ +cmake_minimum_required(VERSION 3.1) +project(basic-pub-sub CXX) + +option(BUILD_DEPS "Builds aws common runtime dependencies as part of build to control your dependency chain." ON) + +if (DEFINED CMAKE_PREFIX_PATH) + file(TO_CMAKE_PATH "${CMAKE_PREFIX_PATH}" CMAKE_PREFIX_PATH) +endif() + +if (DEFINED CMAKE_INSTALL_PREFIX) + file(TO_CMAKE_PATH "${CMAKE_INSTALL_PREFIX}" CMAKE_INSTALL_PREFIX) +endif() + +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 11) +endif() + +if (UNIX AND NOT APPLE) + include(GNUInstallDirs) +elseif(NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR "lib") +endif() + +if (${CMAKE_INSTALL_LIBDIR} STREQUAL "lib64") + set(FIND_LIBRARY_USE_LIB64_PATHS true) +endif() + +# This is required in order to append /lib/cmake to each element in CMAKE_PREFIX_PATH +set(AWS_MODULE_DIR "/${CMAKE_INSTALL_LIBDIR}/cmake") +string(REPLACE ";" "${AWS_MODULE_DIR};" AWS_MODULE_PATH "${CMAKE_PREFIX_PATH}${AWS_MODULE_DIR}") +# Append that generated list to the module search path +list(APPEND CMAKE_MODULE_PATH ${AWS_MODULE_PATH}) + +if (NOT DEFINED CMAKE_BUILD_TYPE) + if (NOT WIN32) + set(CMAKE_BUILD_TYPE "RelWithDebInfo") + endif() +endif() + +list(APPEND CMAKE_MODULE_PATH "$ENV{HOME}/sdk-cpp-workspace/aws-iot-device-sdk-cpp-v2/aws-common-runtime/aws-crt-cpp/aws-common-runtime/aws-c-common/cmake") + +include(AwsFindPackage) +set(IN_SOURCE_BUILD ON) +set(BUILD_TESTING_PREV ${BUILD_TESTING}) +set(BUILD_TESTING OFF) + +find_path(CRT_CPP_LIB aws-crt-cpp PATHS "$ENV{HOME}/sdk-cpp-workspace/aws-iot-device-sdk-cpp-v2/aws-common-runtime") +set(CRT_PATH ${CRT_CPP_LIB}/aws-crt-cpp) +add_subdirectory(${CRT_PATH} build EXCLUDE_FROM_ALL) +set(BUILD_TESTING ${BUILD_TESTING_PREV}) + + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_INSTALL_PREFIX}/lib/cmake") + +file(GLOB PUB_SUB_SRC + "cpp/*.cpp" +) + +set(PUB_SUB_PROJECT_NAME basic-pub-sub) +add_executable(${PUB_SUB_PROJECT_NAME} ${PUB_SUB_SRC}) +set_target_properties(${PUB_SUB_PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX) + +set(CMAKE_C_FLAGS_DEBUGOPT "") + +#set warnings +if (MSVC) + target_compile_options(${PUB_SUB_PROJECT_NAME} PRIVATE /W4 /WX /wd4068) +else () + target_compile_options(${PUB_SUB_PROJECT_NAME} PRIVATE -Wall -Wno-long-long -pedantic -Werror) +endif () + +if (CMAKE_BUILD_TYPE STREQUAL "" OR CMAKE_BUILD_TYPE MATCHES Debug) + target_compile_definitions(${PUB_SUB_PROJECT_NAME} PRIVATE "-DDEBUG_BUILD") +endif () + +target_include_directories(${PUB_SUB_PROJECT_NAME} PUBLIC + $ + $) + +aws_use_package(aws-crt-cpp) +target_link_libraries(${PUB_SUB_PROJECT_NAME} ${DEP_AWS_LIBS}) + diff --git a/Tools/IoTConnectionTools/aws-pub-sub/License.txt b/Tools/IoTConnectionTools/aws-pub-sub/License.txt new file mode 100644 index 0000000000..e63c6e13dc --- /dev/null +++ b/Tools/IoTConnectionTools/aws-pub-sub/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Tools/IoTConnectionTools/aws-pub-sub/README.md b/Tools/IoTConnectionTools/aws-pub-sub/README.md new file mode 100644 index 0000000000..1db51d3803 --- /dev/null +++ b/Tools/IoTConnectionTools/aws-pub-sub/README.md @@ -0,0 +1,69 @@ +# `AWS Pub Sub` Sample + +`AWS Pub Sub` is a sample that could be used for a quick test of Amazon cloud libraries. + + +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu* 16.04, Linux* Ubuntu* 18.04 +| Software | C++ 11 or higher, CMake 3.1+, Clang 3.9+ or GCC 4.4+, AWS IoT Device SDK C++ v2 +| What you will learn | Use the Message Broker for AWS IoT to send and receive messages through an MQTT connection + + +This version of the sample has been tested on Ubuntu Linux. This sample requires additional system configuration when using Ubuntu OS. Instructions on how to install the custom provided all dependency libraries for Linux can be [found here](). + +## Purpose +`AWS Pub Sub` is a simple program that helps user to execute the example of the AWS code and toconfigure and run Amazon Cloud services. + +## Key Implementation Details +This sample uses the Message Broker for AWS IoT to send and receive messages through an MQTT connection. + +##License +This sample is licensed under Apache License v2.0 + +## Building the `AWS Pub Sub` + +### On a Linux System + +Perform the following steps: +1. Run in the terminal: +``` +cd $HOME +mkdir sdk-cpp-workspace +cd sdk-cpp-workspace +git clone --recursive https://github.com/aws/aws-iot-device-sdk-cpp-v2.git +mkdir aws-iot-device-sdk-cpp-v2-build +cd aws-iot-device-sdk-cpp-v2-build +cmake -DCMAKE_INSTALL_PREFIX="" -DCMAKE_PREFIX_PATH="" -DBUILD_DEPS=ON ../aws-iot-device-sdk-cpp-v2 +cmake --build . --target install +``` + +2. To execute the sample that had been built run in the terminal: +``` +basic-pub-sub --endpoint --cert --key --topic --ca_file --use_websocket --signing_region --proxy_host --proxy_port +``` + +3. Clean the program using: + +``` +make clean +``` + + +## Running the Sample +### Application Parameters + +endpoint: the endpoint of the mqtt server not including a port +cert: path to your client certificate in PEM format. If this is not set you must specify use_websocket +key: path to your key in PEM format. If this is not set you must specify use_websocket +topic: topic to publish, subscribe to. +client_id: client id to use (optional) +ca_file: Optional, if the mqtt server uses a certificate that's not already in your trust store, set this. + It's the path to a CA file in PEM format +use_websocket: if specified, uses a websocket over https (optional) +signing_region: used for websocket signer it should only be specific if websockets are used. (required for websockets) +proxy_host: if you want to use a proxy with websockets, specify the host here (optional). +proxy_port: defaults to 8080 is proxy_host is set. Set this to any value you'd like (optional). + +### Example of Output +TBD diff --git a/Tools/IoTConnectionTools/aws-pub-sub/cpp/main.cpp b/Tools/IoTConnectionTools/aws-pub-sub/cpp/main.cpp new file mode 100644 index 0000000000..72d262bcc5 --- /dev/null +++ b/Tools/IoTConnectionTools/aws-pub-sub/cpp/main.cpp @@ -0,0 +1,374 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace Aws::Crt; + +static void s_printHelp() +{ + fprintf(stdout, "Usage:\n"); + fprintf( + stdout, + "basic-pub-sub --endpoint --cert " + " --key --topic --ca_file " + " --use_websocket --signing_region --proxy_host --proxy_port \n\n"); + fprintf(stdout, "endpoint: the endpoint of the mqtt server not including a port\n"); + fprintf( + stdout, + "cert: path to your client certificate in PEM format. If this is not set you must specify use_websocket\n"); + fprintf(stdout, "key: path to your key in PEM format. If this is not set you must specify use_websocket\n"); + fprintf(stdout, "topic: topic to publish, subscribe to.\n"); + fprintf(stdout, "client_id: client id to use (optional)\n"); + fprintf( + stdout, + "ca_file: Optional, if the mqtt server uses a certificate that's not already" + " in your trust store, set this.\n"); + fprintf(stdout, "\tIt's the path to a CA file in PEM format\n"); + fprintf(stdout, "use_websocket: if specified, uses a websocket over https (optional)\n"); + fprintf( + stdout, + "signing_region: used for websocket signer it should only be specific if websockets are used. (required for " + "websockets)\n"); + fprintf(stdout, "proxy_host: if you want to use a proxy with websockets, specify the host here (optional).\n"); + fprintf( + stdout, "proxy_port: defaults to 8080 is proxy_host is set. Set this to any value you'd like (optional).\n\n"); +} + +bool s_cmdOptionExists(char **begin, char **end, const String &option) +{ + return std::find(begin, end, option) != end; +} + +char *s_getCmdOption(char **begin, char **end, const String &option) +{ + char **itr = std::find(begin, end, option); + if (itr != end && ++itr != end) + { + return *itr; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + + /************************ Setup the Lib ****************************/ + /* + * Do the global initialization for the API. + */ + ApiHandle apiHandle; + + String endpoint; + String certificatePath; + String keyPath; + String caFile; + String topic; + String clientId(Aws::Crt::UUID().ToString()); + String signingRegion; + String proxyHost; + uint16_t proxyPort(8080); + + bool useWebSocket = false; + + /*********************** Parse Arguments ***************************/ + if (!(s_cmdOptionExists(argv, argv + argc, "--endpoint") && s_cmdOptionExists(argv, argv + argc, "--topic"))) + { + s_printHelp(); + return 0; + } + + endpoint = s_getCmdOption(argv, argv + argc, "--endpoint"); + + if (s_cmdOptionExists(argv, argv + argc, "--key")) + { + keyPath = s_getCmdOption(argv, argv + argc, "--key"); + } + + if (s_cmdOptionExists(argv, argv + argc, "--cert")) + { + certificatePath = s_getCmdOption(argv, argv + argc, "--cert"); + } + + topic = s_getCmdOption(argv, argv + argc, "--topic"); + if (s_cmdOptionExists(argv, argv + argc, "--ca_file")) + { + caFile = s_getCmdOption(argv, argv + argc, "--ca_file"); + } + if (s_cmdOptionExists(argv, argv + argc, "--client_id")) + { + clientId = s_getCmdOption(argv, argv + argc, "--client_id"); + } + if (s_cmdOptionExists(argv, argv + argc, "--use_websocket")) + { + if (!s_cmdOptionExists(argv, argv + argc, "--signing_region")) + { + s_printHelp(); + } + useWebSocket = true; + signingRegion = s_getCmdOption(argv, argv + argc, "--signing_region"); + + if (s_cmdOptionExists(argv, argv + argc, "--proxy_host")) + { + proxyHost = s_getCmdOption(argv, argv + argc, "--proxy_host"); + } + + if (s_cmdOptionExists(argv, argv + argc, "--proxy_port")) + { + proxyPort = static_cast(atoi(s_getCmdOption(argv, argv + argc, "--proxy_port"))); + } + } + + /********************** Now Setup an Mqtt Client ******************/ + /* + * You need an event loop group to process IO events. + * If you only have a few connections, 1 thread is ideal + */ + Io::EventLoopGroup eventLoopGroup(1); + if (!eventLoopGroup) + { + fprintf( + stderr, "Event Loop Group Creation failed with error %s\n", ErrorDebugString(eventLoopGroup.LastError())); + exit(-1); + } + + Aws::Crt::Io::DefaultHostResolver defaultHostResolver(eventLoopGroup, 1, 5); + Io::ClientBootstrap bootstrap(eventLoopGroup, defaultHostResolver); + + if (!bootstrap) + { + fprintf(stderr, "ClientBootstrap failed with error %s\n", ErrorDebugString(bootstrap.LastError())); + exit(-1); + } + + Aws::Iot::MqttClientConnectionConfigBuilder builder; + + if (!certificatePath.empty() && !keyPath.empty()) + { + builder = Aws::Iot::MqttClientConnectionConfigBuilder(certificatePath.c_str(), keyPath.c_str()); + } + else if (useWebSocket) + { + Aws::Iot::WebsocketConfig config(signingRegion, &bootstrap); + + if (!proxyHost.empty()) + { + Aws::Crt::Http::HttpClientConnectionProxyOptions proxyOptions; + proxyOptions.HostName = proxyHost; + proxyOptions.Port = proxyPort; + proxyOptions.AuthType = Aws::Crt::Http::AwsHttpProxyAuthenticationType::None; + config.ProxyOptions = std::move(proxyOptions); + } + + builder = Aws::Iot::MqttClientConnectionConfigBuilder(config); + } + else + { + s_printHelp(); + } + + if (!caFile.empty()) + { + builder.WithCertificateAuthority(caFile.c_str()); + } + + builder.WithEndpoint(endpoint); + + auto clientConfig = builder.Build(); + + if (!clientConfig) + { + fprintf( + stderr, + "Client Configuration initialization failed with error %s\n", + ErrorDebugString(clientConfig.LastError())); + exit(-1); + } + + Aws::Iot::MqttClient mqttClient(bootstrap); + /* + * Since no exceptions are used, always check the bool operator + * when an error could have occurred. + */ + if (!mqttClient) + { + fprintf(stderr, "MQTT Client Creation failed with error %s\n", ErrorDebugString(mqttClient.LastError())); + exit(-1); + } + + /* + * Now create a connection object. Note: This type is move only + * and its underlying memory is managed by the client. + */ + auto connection = mqttClient.NewConnection(clientConfig); + + if (!connection) + { + fprintf(stderr, "MQTT Connection Creation failed with error %s\n", ErrorDebugString(mqttClient.LastError())); + exit(-1); + } + + /* + * In a real world application you probably don't want to enforce synchronous behavior + * but this is a sample console application, so we'll just do that with a condition variable. + */ + std::mutex mutex; + std::condition_variable conditionVariable; + bool connectionSucceeded = false; + bool connectionClosed = false; + bool connectionCompleted = false; + + /* + * This will execute when an mqtt connect has completed or failed. + */ + auto onConnectionCompleted = [&](Mqtt::MqttConnection &, int errorCode, Mqtt::ReturnCode returnCode, bool) { + if (errorCode) + { + fprintf(stdout, "Connection failed with error %s\n", ErrorDebugString(errorCode)); + std::lock_guard lockGuard(mutex); + connectionSucceeded = false; + } + else + { + fprintf(stdout, "Connection completed with return code %d\n", returnCode); + connectionSucceeded = true; + } + { + std::lock_guard lockGuard(mutex); + connectionCompleted = true; + } + conditionVariable.notify_one(); + }; + + auto onInterrupted = [&](Mqtt::MqttConnection &, int error) { + fprintf(stdout, "Connection interrupted with error %s\n", ErrorDebugString(error)); + }; + + auto onResumed = [&](Mqtt::MqttConnection &, Mqtt::ReturnCode, bool) { fprintf(stdout, "Connection resumed\n"); }; + + /* + * Invoked when a disconnect message has completed. + */ + auto onDisconnect = [&](Mqtt::MqttConnection &) { + { + fprintf(stdout, "Disconnect completed\n"); + std::lock_guard lockGuard(mutex); + connectionClosed = true; + } + conditionVariable.notify_one(); + }; + + connection->OnConnectionCompleted = std::move(onConnectionCompleted); + connection->OnDisconnect = std::move(onDisconnect); + connection->OnConnectionInterrupted = std::move(onInterrupted); + connection->OnConnectionResumed = std::move(onResumed); + + connection->SetOnMessageHandler([](Mqtt::MqttConnection &, const String &topic, const ByteBuf &payload) { + fprintf(stdout, "Generic Publish received on topic %s, payload:\n", topic.c_str()); + fwrite(payload.buffer, 1, payload.len, stdout); + fprintf(stdout, "\n"); + }); + + /* + * Actually perform the connect dance. + * This will use default ping behavior of 1 hour and 3 second timeouts. + * If you want different behavior, those arguments go into slots 3 & 4. + */ + fprintf(stdout, "Connecting...\n"); + if (!connection->Connect(clientId.c_str(), false, 1000)) + { + fprintf(stderr, "MQTT Connection failed with error %s\n", ErrorDebugString(connection->LastError())); + exit(-1); + } + + std::unique_lock uniqueLock(mutex); + conditionVariable.wait(uniqueLock, [&]() { return connectionCompleted; }); + + if (connectionSucceeded) + { + /* + * This is invoked upon the receipt of a Publish on a subscribed topic. + */ + auto onPublish = [&](Mqtt::MqttConnection &, const String &topic, const ByteBuf &byteBuf) { + fprintf(stdout, "Publish received on topic %s\n", topic.c_str()); + fprintf(stdout, "\n Message:\n"); + fwrite(byteBuf.buffer, 1, byteBuf.len, stdout); + fprintf(stdout, "\n"); + }; + + /* + * Subscribe for incoming publish messages on topic. + */ + auto onSubAck = [&](Mqtt::MqttConnection &, uint16_t packetId, const String &topic, Mqtt::QOS, int errorCode) { + if (packetId) + { + fprintf(stdout, "Subscribe on topic %s on packetId %d Succeeded\n", topic.c_str(), packetId); + } + else + { + fprintf(stdout, "Subscribe failed with error %s\n", aws_error_debug_str(errorCode)); + } + conditionVariable.notify_one(); + }; + + connection->Subscribe(topic.c_str(), AWS_MQTT_QOS_AT_LEAST_ONCE, onPublish, onSubAck); + conditionVariable.wait(uniqueLock); + + while (true) + { + String input; + fprintf( + stdout, + "Enter the message you want to publish to topic %s and press enter. Enter 'exit' to exit this " + "program.\n", + topic.c_str()); + std::getline(std::cin, input); + + if (input == "exit") + { + break; + } + + ByteBuf payload = ByteBufNewCopy(DefaultAllocator(), (const uint8_t *)input.data(), input.length()); + ByteBuf *payloadPtr = &payload; + + auto onPublishComplete = [payloadPtr](Mqtt::MqttConnection &, uint16_t packetId, int errorCode) { + aws_byte_buf_clean_up(payloadPtr); + + if (packetId) + { + fprintf(stdout, "Operation on packetId %d Succeeded\n", packetId); + } + else + { + fprintf(stdout, "Operation failed with error %s\n", aws_error_debug_str(errorCode)); + } + }; + connection->Publish(topic.c_str(), AWS_MQTT_QOS_AT_LEAST_ONCE, false, payload, onPublishComplete); + } + + /* + * Unsubscribe from the topic. + */ + connection->Unsubscribe( + topic.c_str(), [&](Mqtt::MqttConnection &, uint16_t, int) { conditionVariable.notify_one(); }); + conditionVariable.wait(uniqueLock); + } + + /* Disconnect */ + if (connection->Disconnect()) + { + conditionVariable.wait(uniqueLock, [&]() { return connectionClosed; }); + } + return 0; +} diff --git a/Tools/IoTConnectionTools/aws-pub-sub/sample.json b/Tools/IoTConnectionTools/aws-pub-sub/sample.json new file mode 100644 index 0000000000..0c118243b1 --- /dev/null +++ b/Tools/IoTConnectionTools/aws-pub-sub/sample.json @@ -0,0 +1,22 @@ +{ + "guid": "479AD17C-27E9-42F0-8CB1-14B48D098829", + "name": "AWS Pub Sub", + "categories": ["Toolkit/Intel® oneAPI IoT Toolkit/IoT Connection Tools"], + "description": "This sample uses the Message Broker for AWS IoT to send and receive messages through an MQTT connection.", + "dependencies": ["aws-iot-device-sdk-cpp-v2|https://github.com/aws/aws-iot-device-sdk-cpp-v2"], + "languages": [{"cpp":{}}], + "os": ["linux"], + "ciTests": { + "linux": [ + { "id": "aws-pub-sub", + "env": [], + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make" + ] + } + ] + } +} From 118111190e1fefb271ca861561aef71d29a70415 Mon Sep 17 00:00:00 2001 From: Jessica Davies <65913721+jessicadavies-intel@users.noreply.github.com> Date: Mon, 24 Aug 2020 12:47:13 -0400 Subject: [PATCH 15/17] Add missing A10 option for Visual Studio (#106) Signed-off-by: Jessica Davies --- .../speculated_iterations/speculated_iterations.vcxproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj index 2ec6e32238..7a23ad883f 100755 --- a/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/speculated_iterations/speculated_iterations.vcxproj @@ -104,7 +104,7 @@ true pch.h true - -DFPGA_EMULATOR %(AdditionalOptions) + -DFPGA_EMULATOR -DA10 %(AdditionalOptions) $(IntDir)speculated_iterations.obj $(ONEAPI_ROOT)dev-utilities\latest\include @@ -144,7 +144,7 @@ true pch.h true - -DFPGA_EMULATOR %(AdditionalOptions) + -DFPGA_EMULATOR -DA10 %(AdditionalOptions) $(IntDir)speculated_iterations.obj $(ONEAPI_ROOT)dev-utilities\latest\include @@ -158,4 +158,4 @@ - \ No newline at end of file + From 290f68cde0a7ac01d0de2b3721bc7408a1cc037c Mon Sep 17 00:00:00 2001 From: ethanhirsch <67659250+ethanhirsch@users.noreply.github.com> Date: Mon, 24 Aug 2020 09:48:00 -0700 Subject: [PATCH 16/17] Update MacOS HPC C++ Samples (#104) * Removing linux compat from MandelbrotOMP Signed-off-by: Ethan Hirsch * Removing linux compat from Intrinsics Signed-off-by: Ethan Hirsch * removing linux compat from mergesortOMP Signed-off-by: Ethan Hirsch --- .../C++/CombinationalLogic/MandelbrotOMP/README.md | 4 ++-- .../C++/CombinationalLogic/MandelbrotOMP/sample.json | 6 +----- .../C++/CompilerInfrastructure/Intrinsics/README.md | 6 +++--- .../C++/CompilerInfrastructure/Intrinsics/sample.json | 8 ++------ .../C++/GraphTraversal/MergesortOMP/README.md | 6 +++--- .../C++/GraphTraversal/MergesortOMP/sample.json | 6 +----- 6 files changed, 12 insertions(+), 24 deletions(-) diff --git a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/README.md b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/README.md index 63dbe2063b..b7e29823b0 100644 --- a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/README.md +++ b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/README.md @@ -5,9 +5,9 @@ Mandelbrot is an infinitely complex fractal patterning that is derived from a si | Optimized for | Description |:--- |:--- -| OS | MacOS Catalina or newer; Linux* Ubuntu* 18.04 +| OS | MacOS Catalina or newer; | Hardware | Skylake with GEN9 or newer -| Software | Intel® C++ Compiler 19.1 or newer +| Software | Intel® oneAPI C++ Compiler Classic | What you will learn | How to optimize a scalar implementation using OpenMP pragmas | Time to complete | 15 minutes diff --git a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json index c5a3dd649c..9bf6d60004 100644 --- a/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json +++ b/DirectProgramming/C++/CombinationalLogic/MandelbrotOMP/sample.json @@ -2,16 +2,12 @@ "name": "Mandelbrot OpenMP*", "description": "Calculates the mandelbrot set and outputs a bmp image representation using OpenMP*", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], - "os": ["linux", "darwin"], + "os": ["darwin"], "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "DD113F58-4D91-41BB-B46E-6CF2C0D9F6F9", "ciTests": { - "linux": [ - { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, - { "id": "perf_num", "env": [ "export perf_num=1" ], "steps": [ "make", "make run", "make clean" ] } - ], "darwin": [ { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, { "id": "perf_num", "env": [ "export perf_num=1" ], "steps": [ "make", "make run", "make clean" ] } diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md index 50e0f51b90..a99d5b006c 100644 --- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md +++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md @@ -4,10 +4,10 @@ The intrinsic samples are designed to show how to utilize the intrinsics support | Optimized for | Description |:--- |:--- -| OS | Linux* Ubuntu* 18.04; MacOS* Catalina* or newer +| OS | MacOS* Catalina* or newer | Hardware | Skylake with GEN9 or newer -| Software | Intel® C++ Compiler 2021.1 or newer; -| What you will learn | How to utlize intrinsics supported by the Intel® C++ Compiler +| Software | Intel® oneAPI C++ Compiler Classic +| What you will learn | How to utlize intrinsics supported by the Intel® oneAPI C++ Compiler Classic | Time to complete | 15 minutes diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json index 43217b278f..40360e7968 100644 --- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json +++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/sample.json @@ -1,17 +1,13 @@ { "name": "Intrinsics C++", - "description": "Demonstrates the intrinsic functions of the Intel® C++ Compiler", + "description": "Demonstrates the intrinsic functions of the Intel® oneAPI C++ Compiler Classic", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], - "os": ["linux", "darwin"], + "os": ["darwin"], "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "ACD0E89E-67CC-4CB4-87AB-B12B84962EAF", "ciTests": { - "linux": [ - { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, - { "id": "debug", "steps": [ "make debug", "make debug_run", "make clean" ] } - ], "darwin": [ { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, { "id": "debug", "steps": [ "make debug", "make debug_run", "make clean" ] } diff --git a/DirectProgramming/C++/GraphTraversal/MergesortOMP/README.md b/DirectProgramming/C++/GraphTraversal/MergesortOMP/README.md index ce51161a1a..43356ac52a 100644 --- a/DirectProgramming/C++/GraphTraversal/MergesortOMP/README.md +++ b/DirectProgramming/C++/GraphTraversal/MergesortOMP/README.md @@ -6,10 +6,10 @@ For more details about merge sort algorithm and top-down implementation, please | Optimized for | Description |:--- |:--- -| OS | Linux* Ubuntu* 18.04; MacOS Catalina or newer +| OS | MacOS Catalina or newer | Hardware | Skylake with GEN9 or newer -| Software | Intel® C++ Compiler 19.1 or newer; -| What you will learn | How to accelerate a scalar program using OpenMP tasks +| Software | Intel® oneAPI C++ Compiler Classic +| What you will learn | How to accelerate a scalar program using OpenMP* tasks | Time to complete | 15 minutes Performance number tabulation diff --git a/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json b/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json index 9e89eb23d4..a58affeae8 100644 --- a/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json +++ b/DirectProgramming/C++/GraphTraversal/MergesortOMP/sample.json @@ -2,16 +2,12 @@ "name": "MergeSort C++/OpenMP*", "description": "Classic sorting algorithm using OpenMP*", "categories": ["Toolkit/Intel® oneAPI HPC Toolkit"], - "os": ["linux", "darwin"], + "os": ["darwin"], "builder": ["make"], "languages": [{"cpp":{}}], "toolchain": ["icc"], "guid": "5AFED65F-F725-411D-B21C-B59008D1166D", "ciTests": { - "linux": [ - { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, - { "id": "perf_num", "env": [ "export perf_num=1" ], "steps": [ "make", "make run", "make clean" ] } - ], "darwin": [ { "id": "standard", "steps": [ "make", "make run", "make clean" ] }, { "id": "perf_num", "env": [ "export perf_num=1" ], "steps": [ "make", "make run", "make clean" ] } From c60ee44ec895946af82f89eec5f2be4ad74ca6f7 Mon Sep 17 00:00:00 2001 From: JoeOster <52936608+JoeOster@users.noreply.github.com> Date: Mon, 24 Aug 2020 09:52:27 -0700 Subject: [PATCH 17/17] Update Root Readme (#94) * Updating License file to no date in the title /* * Copyright (c) 2020 Intel Corporation * * This program and the accompanying materials are made available under the * terms of the The MIT License which is available at * https://opensource.org/licenses/MIT. * * SPDX-License-Identifier: MIT */ * Update README.md * Fix FPGA entries * Update README.md Updates per request of sranikonda * Update README.md Co-authored-by: akertesz <67655634+akertesz@users.noreply.github.com> --- README.md | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1fe987a98d..ee7e072fe3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,72 @@ -oneAPI-samples -This is the readme. +|Code Sample |Supported Intel(r) Architecture(s) |Description | +|-----------------------|-------------------------------------------|---------------| +|DirectPrograming/ | +|../DPC++/CombinationalLogic/Mandelbrot |GPU, CPU |Example of a fractal in mathematics | +|../DPC++/CombinationalLogic/Sepia-filter |GPU, CPU |Color image conversion using 1D range | +|../DPC++/DenseLinearAlgebra/Complex_mult |GPU, CPU |Complex number Multiplication | +|../DPC++/DenseLinearAlgebra/Matrix_mul |GPU, CPU |Simple program that multiplies two large matrices in parallel using DPC++, OpenMP and MKL | +|../DPC++/DenseLinearAlgebra/Simple-add |FPGA, GPU, CPU |Simple Add program | +|../DPC++/DenseLinearAlgebra/Vector-add |FPGA, GPU, CPU |Simple Vector add program | +|../DPC++/GraphTraversal/Bitonic-sort |GPU, CPU |Implementation of bitonic sort using DPC++. | +|../DPC++/ParallelPatterns/Dpc_reduce |GPU, CPU |A simple program that calculates pi, implemented using C++ and DPC++. | +|../DPC++/SpectralMethods/Discrete-cosine-transform |GPU, CPU |Image processing algorithm used in JPEG compression | +|../DPC++/StructuredGrids/1d_HeatTransfer |GPU, CPU |A simulation of one dimensional heat transfer process using DPC++. | +|../DPC++/StructuredGrids/ISO2DFD_DPCPP |GPU, CPU |A simple finite difference stencil kernel for solving 2D acoustic isotropic wave equation using DPC++ | +|../DPC++/StructuredGrids/ISO3DFD_DPCPP |GPU, CPU |A finite difference stencil kernel for solving 3D acoustic isotropic wave equation using DPC++ | +|../DPC++/StructuredGrids/Particle-diffusion |GPU, CPU |A simple implementation of a Monte Carlo simulation of the diffusion of water molecules in tissue | +|../DPC++FPGA/ReferenceDesigns/crr |FPGA |High-performance CRR binomial tree option pricing model using DPC++ on FPGA| +|../DPC++FPGA/ReferenceDesigns/gzip |FPGA |High-performance GZIP compression using DPC++ on FPGA| +|../DPC++FPGA/ReferenceDesigns/qrd |FPGA |High-performance QR decomposition of matrices using DPC++ on FPGA| +|../DPC++FPGA/Tutorials/GettingStarted/fpga_compile |FPGA |Tutorial introducing how to compile DPC++ for FPGA | +|../DPC++FPGA/Tutorials/GettingStarted/fast_recompile |FPGA |Tutorial introducing host-only recompile to save DPC++ development time on FPGA | +|../DPC++FPGA/Tutorials/Tools/use_library |FPGA |Tutorial showing how to use cross-language libraries in DPC++ on FPGA | +|../DPC++FPGA/Tutorials/Tools/system_profiling |FPGA |Tutorial showing how to use the OpenCL Intercept Layer to profile DPC++ designs running on FPGA | +|../DPC++FPGA/Tutorials/DesignPatterns/double_buffering |FPGA |Tutorial demonstrating how to overlap kernel execution with buffer transfers and host processing | +|../DPC++FPGA/Tutorials/DesignPatterns/n_way_buffering |FPGA |Tutorial demonstrating an extension of double buffering to n-way buffering | +|../DPC++FPGA/Tutorials/DesignPatterns/onchip_memory_cache |FPGA |Tutorial demonstrating the caching of on-chip memory to reduce loop initiation interval on FPGA | +|../DPC++FPGA/Tutorials/DesignPatterns/pipe_array |FPGA |Tutorial demonstrating how to create an array of pipes | +|../DPC++FPGA/Tutorials/DesignPatterns/remove_loop_carried_dependency |FPGA |Tutorial demonstrating a technique to optimize performance by removing loop carried dependencies | +|../DPC++FPGA/Tutorials/DesignPatterns/triangular_loop |FPGA |Tutorial demonstrating an advanced FPGA optimization technique for triangular loops | +|../DPC++FPGA/Tutorials/Features/fpga_reg |FPGA |Tutorial demonstrating the use of the DPC++ FPGA power user extension intel::fpga_reg | +|../DPC++FPGA/Tutorials/Features/kernel_args_restrict |FPGA |Tutorial demonstrating how to avoid performance penalties due to kernel argument aliasing | +|../DPC++FPGA/Tutorials/Features/loop_coalesce |FPGA |Tutorial demonstrating the DPC++ FPGA loop_coalesce attribute | +|../DPC++FPGA/Tutorials/Features/loop_ivdep |FPGA |Tutorial demonstrating the use of the loop ivdep attribute | +|../DPC++FPGA/Tutorials/Features/loop_unroll |FPGA |Tutorial demonstrating the DPC++ unroll pragma and its performance trade-offs on FPGA | +|../DPC++FPGA/Tutorials/Features/max_concurrency |FPGA |Tutorial demonstrating the DPC++ FPGA max_concurrency attribute | +|../DPC++FPGA/Tutorials/Features/memory_attributes |FPGA |Tutorial demonstrating how to use DPC++ FPGA memory attributes | +|../DPC++FPGA/Tutorials/Features/pipes |FPGA |Tutorial demonstrating the DPC++ FPGA pipes extension to transfer data between kernels | +|../DPC++FPGA/Tutorials/Features/speculated_iterations |FPGA |Tutorial demonstrating the DPC++ FPGA speculated_iterations attribute | +|../C++/CombinationalLogic/Mandelbrot |CPU |Demonstrates how to accelerate Mandelbrot performance with SIMD and parallelization using OpenMP*. | +|../C++/CompilerInfrastructure/Intrinsics |CPU |Shows how to utilize the intrinsics supported by C++ compiler in a variety of applications. | +|../C++/GraphTraversal/Mergesort |CPU |Shows how to accelerate scalar merge sort program using OpenMP tasks | +|Libraries | +|../oneDPL/Gamma-correction |GPU, CPU |gamma correction using Parallel STL | +|../oneDPL/Stable_sort_by_key |GPU, CPU |stable sort by key using counting_iterator and zip_iterator | +|../oneVPL/hello-decode |CPU |shows how to use oneVPL to perform a simple video decode | +|../oneVPL/hello-encode |CPU |shows how to use oneVPL to perform a simple video encode | +|Tools | +|../ApplicationDebugger/Debugger/array-transform |GPU, CPU |Array transform | +|../IoTConnectionTools/Analog-in |CPU |Analog pin input example using Eclipse* MRAA | +|../IoTConnectionTools/Digital In |CPU |GPIO pin input example using Eclipse* MRAA | +|../IoTConnectionTools/Digital Out |CPU |GPIO pin output example using Eclipse* MRAA | +|../IoTConnectionTools/Hello IoT World |CPU |Basic example that prints the compiler used during build | +|../IoTConnectionTools/Interrupt |CPU |Interrupt Service Routine example using Eclipse* MRAA | +|../IoTConnectionTools/Onboard Blink |CPU |Built-in LED blink for common IoT boards using Eclipse* MRAA | +|../IoTConnectionTools/PWM |CPU |Pulse Width Modulation pin output using Eclipse* MRAA | +|../IoTConnectionTools/Up2 LEDs |CPU |Built-in LED example for UP* Squared using Eclipse* MRAA | +|../SystemDebug/System Debug Sample Build |UEFI |Basic example that showcases the features of the Intel® System Debugger | + +# License + +The code samples are licensed under MIT license + +# Known issues or limitations + +## On Windows Platform +1. If you are using Visual Studio 2019, Visual Studio 2019 version 16.4.0 or newer is required. +2. To build samples on Windows, the required Windows SDK is ver. 10.0.17763.0. +3. Now you should be able to build the code sample. +4. For beta, FPGA samples support Windows through FPGA-emulator. +5. If you encounter a compilation error like below when building a sample program, one reason is that the directory path of the sample is too long; the work around is to move the sample to a directory like "c:\temp\sample_name". + * Error MSB6003 The specified task executable "dpcpp.exe" could not be run ...... +

X4(P207Ne@GIFLP8!TDsM?r6xVipaplg zSAq@m<5bgQ0W0#8Ld-Etl`ku%<=R0lM`<%R{2URt)RN{GSgUO(0g^wl>bH5Q0=oMX zDG|_zpZLvlp)Y?>f&B0ELbeWF6B_sv!tYCbX_w5zykJxFNdy?~Dilh9LJ>IA{j56!yqf0>&@>cn?SK9jiue3qnv-d!vKb%x3 z$@c(>KM7(wJlL}qK6t6?KIiwjUE@WnAN-WzbF^`q2cSCLsLR#my>QR?Klc>rj7NQ5 zkwNr5xU;SA(~*H*HtY`*Kpfl50Zc67vv+3S$GwmL0IvYRad-vT`OK+9KE2o1ex(h< zE6^G^bAO&-H7(0#!!hM|VM64*iWxL^@}I^S*>cA1w{98AN1Irm_ZvWl6cq;>zV5qp z9c#?l&}sjIRN|{I0=sS@!Rnm{pu&bdFIB#f+x+-k(J?=&f4Yf${gE?b`f-6ZmQ6q) zuv-SDK-vb7=6cRR^%r*cD1vPUU?ljzaI%4zS0Xv0+`A&F3uwJaZNZO0DBvrF(?%Fx z>dKt7F*kN=q!6Ggy$47MB2=O};L(SDg~GqNNO*u^sR1HwB!wzF-o_JKt2b5QFJ9`DYkBPek#9nc~K$8%yAWyGSM}Y4Y}H zV9(qob_CnW31DkRAcprG)|LXqO?TFbB#kZ2&;Ia}(HjcPwtSD6Yw-wTdgD~~lX!d= z^p4o2JMEl^ynSOcq%Ac$Kh$H9-MmqDw7&eLb_-r%LI9QVugjb{vy)wTH#}VkRfoIx z=1#{64q9w^c>rajY~SinF~6yQMwE=j^pyic`qH82jb`{gx5WMqz?no)fh-Abmj@No zs_^N%WQfY{pND8ybyK^&pGVBhZTGFi|GFVcx=>$sD!GW!YdNFhLH}`*4ZO3n_&u+u zg~mLB^jH;_LL4umMbi*iI%$O*y`^cxhYc;$fyoF_LI;&vNv8ZWq<%3m-=I;|QftLV z-t>DRd;hge&SW{%uT`}-&|*Xd2+EhdPW;=9e<9gpQDy3&pvY%<=ap#MKO4!>NXXvh z@Ve-L7uXY6ZzmJ6X+4)bI~oueYXO4WHvX0Fz?Lw@RaQ8Km9Jipfslr#X4f4U@h zj=F58%RQFXy9Ts~WT?bkU7y5hGD?F^M4h?$G_tiOyj|(e>wRB&MJcIBc9NXuc_qw*?Ad&tn%SX#d)Fi?<* z@zBja*E5I?TEFY#tGU8v0^iVMA)lu!ay%GAgs5XqhQ3@mlbHt24OA9Lk;bEGdXjxz z=l1bKy5O41u!^xvdV?}=&CLNk4)0ZF1d*Pizz~}S3JI^-0Z>H(JiI01TA|7k|Gfu+wj&1Ku-#I((iy?7>?I~6C>$VsdD z;ama&wXBq;as^^H<|?|L1@<>LR(H%PS6~r=u!s)#ivlQCpB75a>U+m^QlCkL&3XC7 zFHj`_5+jN2fjF-;0CZFA?@v$5Fe7sK)sa5c;E6F3&QS^g=D-S4rDI><@=KrqcM9t2 zLC}M_&?zn%G)u1bc-y`PLlhW}^(5BpWt+g>*5P%q;7S7mD43sTb0jRN&v~kVCLo>09EeTv0Krs~6{ILNH@>dfdl90Je3Y?Z~*}@IiD}px7 zyCrG&I>}mQaE%!WI^2)yrEpt5Qoi+UR#4K8be`pSys9S6eH?ymJ+sL)Pf-yl-MCKn z{o3#oII*sO$#E)Hg?4ZrCr<4zog{ghwDmtKA$8O{C;EfmrAM(EcX9kYV6jLjPr$l+ z6HG0-_!+go-M!&4fdCIt&DTJur0F{X3|+iv2dWsfflBY+v}Sy~!Vm%DC*wNvs75#B zB3#aZTz<6c^QW)S0KcJtRrd)Nry`?qP#x(9B0li>{cWkKBS8?v)qRo;o`~bI(p(M2SNK#}2aYR2 zATxs}E%8qUY&G@A!vug$40in+JXW^_>44CRCU{c|TvucBX?cpTU7cd=V*Vf*BlyoP zR+^yNVcxbqU7%@_-?seZu%1ZnN2i?T>eC_~=$&Xy&d$YulsL{&<(Y@mf`&`$96M{d zQQlr7JqpVnO0!GpqRz)j51-`|?55)+{OfL>AvJav!Xl(IRn7rl-X_?wzSC#5P-T|5TW!K z%ze)HAKn1JRSoHk1JQd#6XOPiD+h zvQY(K44N-5e*IppRxHzAkTVqCH9@=h_6dWK;LDx z0YR#~+noP-t+NPAY|#)v7)vWMGePO|Bm;{C5NTWO^>h8VckvsuJ7{Ym<(4TOsz1;% zL@U02YF4PbG$rdSs)ytRJRoUyTxWOJv;gqcr=f5{X)=%m!g8Gz0`1kizz|3}{Qta3 zT|j+$0U5#t6qeH%6_CU$w9sV6@ZVQGhtW@70eV^&M>1@q)ktin0bhQgjuWbY6l41H zwyHV?cGjHbeNK=<;2EBhLW(lu%z*H$^BgMs@1jG9;g7NB+NMNYz|c>E?gq#im_D&T zt*uh`V<48ks=9R;v>tRxn-=mefPmYoHViv5RmR7^5wB+9yR=PLu#)BCLyiqbgx4I` zo(gzbPSNTlp(+sQE(&=VNNq4xa@6ArOlK4-eTaoOT4S%wm)wlJ3%k%rNU9wK!FSC{ zxG|38$&k74MmFO;Pw7=aLO{ZCWp9)$pppayD$Oja5a#~*YcaOqX7?pcs(_TE@}|nC zZ}&TSP^98-zfU_%_x@7r^eR@Xyl!Cbxbihfc+_oz^BvtT zSkd?i{%bNtqh$54pi+MDw88szkfAG>glLCDNTRk0Ao468&(o~uyZ-0n;mccf0AXjR zq6-SdY!(9$K(`TwrO@Zop*84Xrg9{Qd8J`a7STchJ~O%Vua)|0diMSznfOOv(>jR; z_%(17dyhG0KHGY~YkBVOYIx#^&;Ter{TS%|(YX0FVi^d^v`5KVsPjA_SFhWBTZ<)g zaVqtBWJZ=nMwU%RhD}b6RnA$>L4CbJee9WYnu8ZC&8heW0_bADDK-2y4ZY1A232ey zUo#K!C|hR^BDJFzdsMmYRmzv-96&ZyiY?Ot`|HrHaXXqT@+rxyPwqPjre}lD#5qP=z|sE;tXm4e7P_+n^|{nte9Eqv(x;nT0*1{kZ;u&wHO{-3+sE)g0-UQ=I|bMa zv=IZbfmR2}Ns~z4&tyVbOjnlT2ZIE;I$NoCKq~AXB%chm*)bp3YXjQHL>gn~Z}y$b zl-CPqvr__2;+Z@#K)an#Q+G?gd45H`ZaG1%#9%7#r00Ml6u_sfOZW1I?PF$oP!iOk z18z=p9Iit@mIR#PB(U&ex;~P zB&?A5*?){)mp6a3tRLY#qk=!OK{V|jM_ZhhK5xU*aB|A0o)mwagC_TnJjC@lE(8&s&RP&n&P2q7Nvd|sdtJRd)0F}u` z)GfqXIdc(6>hZpKW7plAq0bsPh=cjLxe`mSw@dDPiDG^=ST%j#=0F(sIn$dTyt(`0 z&(P6}F<#&H!j7#wd!3t}(_D<5r#JnW@T8fe#_0Fm(;2ZVlV8AM@q#xT^382*eNHcd z=kQ`=x!)2;wi8*n18nR}zH4VU-#)p)VPnnD88m9HPF085>iBgKCz3&t)Vwg9~n6M@&?njqjpXNYhV<>PVJ_=Y^Z?< zWLroU#)U8yiw!UBmezkFnwElYesoZ?Pbpb^3o`0wGA^vIul- z3B4!5BIG-xQk$@L!jXDA*|6wIiF3gu(X5$>A0A*T=(YaQhdwQO{}*Pvx8PTpGHcq5 z(8$_AM~@UGpOIFJ0KxRLYO5cZxg|Cl8oon95qH_OgY2G~b;z&9%GpT7eFIW;%Nc6{ zadFlapZ=%k#E9d`D!b=XjpmUO3@NztF3+AUxS^W%Er3n9b}sP8S=mLf)gA~`au2Rd+tl6bL*gJ z35DIkdaSYfi)|_1W7Flp9y08p%x_h7lE>;kLoKA|dex*|LKSGL{cO92rj3(Hwl$8u?2x91%W;FKjDkEI@I@Hp{uk(~o&%H2lbei4huAt8C_8Yc zL%@Ppj=EEvH>-gsO{YpC?r>6EJ4C5;N^Ph_3kl=($^8(WFPYWU5b+IA6pj0&*)j}@ zPQ3F~Bev%XExov4nWU+~E=$vv?6zrj3B?$c`&pAh_oKR>{Ko7nOx zE_L2r=g1<_QjV~Qwks=+P`=52AZWyW_M_kg#J#CIzn_m*S+eT#>FJ(y3lxakhjr?B zI<2o{p!`ziVnO$Ls#=MtT|air4UO1gi8rf5gJrL8v!-5O#Fh$f5Ia4ZMWhCSTl?j; z^ZS8Y8#7DMtXIEuoj95>I8A)e{mY^15s+*1S$h#F$0;U=?~UhApSV%hM&i~Um?o(6 zyl^b6mEbE+tlp!-aIi`$w$X7&X>N{$W~pYa%Q^nx&4%fdJ%3MnILK(|lB4vLgu6p6 z)321slyOTH%-JsGaN*WzEmmXIqcbJe1FsR?1p9mqm_w_MD)LFUK9iY@65+v5W9=Rc z7h9iCne+(8&s?ps148USa@^@HSs zebR^-j$jETcz*gcxIMHx^s&4D94?zFS`+^Ho2eJWc>2MwdLPL>*snL zz8m=$e4KDCxtNXj5B5DGRtr8gXio!OCJ!o>gT9-%8li7uatn^YsT!I}CxA;Mw6yC3 zqxP8H$p?o>gw~P>C?NQ1vmC~ctCmg7iVi*e}Y->-+so5Q-c(+x4wmac#e{wwSfMe)1gc3||YVDr=<912ZWIljOl<%g-*Udne zwO~aC7(aI9h(l?XIpDD!*FYOM|0J0?DdPXf(xg#P&woY{$}}}nW1{bqhWKcWPOCj) z|6wZw*K=0S@t#({IY^@E|Lj<&ZOPkpB3;|W;AbJ_#=*aaE9lX&K=QT652-!4q+LuZ zbtIGg?F z%;{p9%D+D`V8{VD-H)>&_RASrIcDVl=|ME-_hAdjp|lY$G=TglcJi#<0)<&Nsu=;; z{Pmb3!Gjjq{TKo+TdplDsjg*-{~|fO{B@3Z>pNzW4+dWaq z14TPRJZy(-KtTKa$9J7|A`rMgLvi0qM#;%D;Nu?vo`3=|s80QL-QI{7QSW-|!;iN& zkUB1M$1iy2EA1I~KLK^g*PeX~tIh0|pN3R_$%Q_*dPHo~SVHIm#GJtXnlUF6MgT*I zbAxcGUiSz!66jMEC>y&+M+qru{WEich(s?yJS-$WKck%xCq*ny?UX_`r|6w7&_w8< z?<9i6#tb`@_@{$Pq3cT&sn{C@uZe&>jAvIvQprWyCtt$4HH+=cT*&aH z!$k&?4iT^7t7#AHQClGvyS>M$cHnpUhocUvMd=pyw08E47U8uHJ^%f5m!-|L8qKX6 zftr1)`cs-#+&#K@OU(})ML8vEd;09zA8r~_2fFt=Jg_`=?8E`)XeY{_m#+F>J$}%5 zyGFEW9Dj~)eEwpzwh=lZy0)g!i^0vnRo!>GdhUlTL1v4m0^cHl@6RfD1G$jAsp`_h ztD>$WZ0zU`*P8pRqjZ8ehp!>%g>NJH2Tf8C-#snhHAnDGNcP58$}S68-pzQu`wcfG z!E5`V+qBnb2Fhpdk>N;7vyX(9_S2(%s$K~8Ed4f=?F*k8vo|fj$24?7`~29PO~C0&sI~6!axa=O2TfLo1TjuBRbauCy_fQisX5%D+gJE z$SZ5bwU=qVwKMYd?25!Twz>E9_sxb2wtjUYc({3AH1k~?xg*_-Lw%xYdlN$Q2EDR7 zc7{LeQfm3U*t_x1_w-h*#=ICDR6@37TxZMI%7>Qn=K}=A`Bvgagg!q{8Yc7O%{xtu zUg|wOKh-IiKILAvLRZ!8i?wlDi{o5S`LufavT^-x;((9jqRW-_ZjXfsyy&9ZqUasz zHl)u%k$E5Y?GDOi`jX{10i5%+mukoZthLKR*MMwaIX#nq|7x1N9+T_l$^Cr=4#bi0 z!XT%G^dvTMoTZc$&pJ>^zcwm2Zx;*Szde?!;mR9dMdQk3zizr2)c4yjb~uli zCBfR^_2Y2^9%I~QWLVe|PxGG1xRy<)se(|YJ?JU1j!)!=VM}a?UjJO<(9Go8^LoyU zd+Wl~dp4dGjnCs|(^7USaWBlqKWoRs05w%z?cWt2ArH&XMtk;4SuJlv$xx%2O4Y<% ze@K~W+jg!0-9R@&(Ic6!M9PbV4Ov!k+>}?oxB}y0Ego0{AKLw$Xet)Reqw@RV>s3b zZ|o6(3^RSO-n~u|nM4Cct;I;(t3VjCp0dcS5$<7j8f2c{aC*Qc za&QUPxWd5Y_{*&@_JIa2*m1pDz3=LV(Dj{g!Ff88@4$ZO5SC_n|OM*H6gR)U@5ZbKw|M$lqU-YxeUC$IBp1 zhX&g@I*SIu%O8EmDpEE0@qBN4vhSAr-``9`eCs%E=9(_f3Xe>+2PO_%BqO~|uDuR^ ze*k#}Xx&*<8s7NK4Po)rVfO1VUT0fzml#xz?X({<>HZu>OY)hs_str0WZ%A?JbX_$ z{v6WhIR>US+=;jUr<+vebR!ThWEd<;@zgaun~D!|%#ZBT@9ayPD^1&e!a}UgqkY#` zuG-J1T5Y9zWj?%CV)0PopLezF2xclrk8i5 zmEp6|erYqyWhx}IX~Ix!`;!Zek~rVOc1 z-(=pe`**b3GZ-+Px>h#5nY~|+m!T`G1v3OCip!e5Ma*_?)oS-*F%m*T$E59E2s;mJ zX7r$!w5N~IVmyhMVm+VO2CeKjzD>;|jv8*$p>*h!45^xirWwaNr|B7y)V%5qYN!?~ zV`$x6hz&o_{df8-XM^YaK02=OZlw`}>?-w{D*KPF2ygI4P*WENUFAe4x^5&HVAFIf z>-`+~oC*U6^^V8!c^GzB4idYIMWkKwhj-_vDia(vN3mXv7fp;cXr!6GH~7r0l?*pY z3{RXUU$41M_IK4}_v0co<0xoXJcrBg%M32bNq1nC#kXG(8bZyE1SAoD#xm6n{_Hhw zKIkxw+2}IBH5I?85Ei#z>_*oaJy-IbjaZ9+;NzxZn|t8TqI%akw^k_Zw!K`VY7`wO zGQ$LE3igAWWFjljTz9=|CIerhgKx14cCE1mYsA1*GFQaL$-zN%A5ozjyhNNap!u%7 z$>eFdJ$C5r-$@2;cjvh`edcvqv3AE~*P|?pbP7#3zo+MPSG_ac%*?426klk3nAV0b z+Gym6XyfUBJG3IvRBpF0K=sQoE|9q?w z#lzF@(y$hk+C{y0mVz~_y|hE-1}8WAh2sk?RvcojUrSuHtebJ2&cY~|XhyB_$GWA> z8T1>E2W!@#G5sTa0zp%zBTZmC?(8CInj%$8iFz2X35MFBrUhdjTG64T+!>LgYF4wN z`O5{KffCu5enpD$qQ%fFaX2=geDzUUF_nJfujP71S8dwz4QE8xsxnqwr!;gfO@2AM z&d2@!@6-V_rgRolY1zSHfT*x>2Cw&D@qiFfA_NsBf(9h z?_6iGYg4oxrKOZS9#Vv`u%=4E)faKFNbnNIHym@m{aw5d=7)*zi6#*dne?00t09c| ze(k2--$v-Zi75lEO_|N97$$n$Lb*%r+mnCyxxE9prlyc0VdCgiJdN}1`Il3g_;kbZ z#Hog+{Tls^J@`O|6kZP>s<*;ybWo&cIN8&^~bbR4EHdooJS4- z^X_0l+=M$VSUGS6RMocNuE61oZ1>?w|C|=AvNdhU>3o*s9`KC2D}`+37`T?rIT%lk z)G_^ODD4FUX0|1~h`3T+uUza~tO=*}pz(t10-ASRg!VweEG`pfk&*jJj85Iuw$`1t#Saa{6w-KcTB?DeA-|V?mg(G~gUjs{y2sJ}5)mP5*UZG@%QPa_ z#}Cgl^B(0wY^V#n#w(P-$*fbnjUK`o_kOewA8Yw9( z9?wllU)eC^mO96Uop4tp{cJB>rH2{n$1{eOvKu7bz)CVMG4@#?^~Mmy9sg77E-OnI+z=g!A|WvO`ECCL#;6KX8#uM8q94 zZb3{$dE~_cgTX{pKxXnb$wUl;*Z=YkF%iR%$2y6ah_8^zh5lieX8u3kq?SSr?{nIL z;Hmh4DI&GBQ}|}9qv{p5j0D;a;^1$lcTY#=b~Om)i~fCSuyB@G;~j}F?F{j*K~4r{cIrcxA>hIW-h4O3YC#~!TA>U4#nre;S>IsDPWJGEbs zV!nLSxVEEW?2QF%e54Uqv~nYKhw##i33}$AjbW1Jb8HE9t5R3(HXgJQxXB~i^7^=& zxwcN}KvNTDwO`-+*I?;HRnJ5n&2)IZ;+656iW7h4OY8Tzz7vk?g=1#U)(W|BZWJcd zw~>y{Ib-I7Yt(zw8tvH5H+O)+Tswr1XAUD&!Z*DV{4^ zw^G@1>3x)DZbySm_MU4XWOBsV_$n#sgWocjh$P`!**5M zPfJ?RPK$O+g0b!Y-djg7+Jo?cS6kEDqfJW1C&%{Z!->_NZ~0o|jfx`Mod+F~m?YdK zw@t%ioKm0sGgm!0jM(pAS3mm)(MI(JcT^5|^movSY)@J^ad3#NY!6JeyenS9b7cyd z>B`lplh$f$1_OE`x9OS4_IIb%=Z^ETrP1VVJe7`EmrTxHephdU6E+{JK+}z47qC_H3?Z-ZNOvDxviS;JvgL) zb?jXXQ9Ly@ug$M8^XW=P@c6Hl9)`Tl_}WnxXbV2@oi8oB2aFu&_Qyh*bHaI2*g>Eu zPTZVSXZ_i@_4}uPr^H&($XBZ4<0BhK(8G%521b>m9vziWvnz>l7>t>#$&>t#K7PdRd i)&JQS>l~LRQto#co+=dW(nP_3^3uvTa<1Qb_`d)unQ0&Z literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/no_fpga_reg.png b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/no_fpga_reg.png new file mode 100755 index 0000000000000000000000000000000000000000..5383063625313f5dc4e163d60a4b8c65cb212459 GIT binary patch literal 84191 zcmeFZbyQS+`!`Am$|#`100Ih#fPi#KNEt}i44|Y+2?$8nFo2RuiPA72J;Wd=4MRzH zcXxLTG4CF{pZmF=^ZxT&=d5$qI`4W|%gxMYf9uN6727~H6**E6EeH<}k5vBo6Ae7P zOJu-5(<=nP9bP&kM&K8|y@s4DUS0?NGVtY+nT)aw9^Q}8tEaDsfbS%>&tKT%;oWq= z{llLyWpKg6!=}kUk%71vtR-C5qtu>hbjDZYUjL%U?Hr}o%raHp+ND0utL3C-kwcrHaD&ar1pm)RBce>)-hrQLkQ#1>z>gO=33o!=52rNv&j&m$FOI)H z<={d8`lN&Z-w6Ry{FjCo^6+1=`J)#96HQ62y_EVfk{=k*jMFQxdu!tF&ms`%%snRS z8@oSq0PTEEh||$~_((=aoH{ZJ;nXwiQl?LzH*Q29PBW!%CV@0AHlQ#TBhu#y!iF)tT8KLO)tHFE~AUg=RBnyCkSBXU*OjC`;Wh6`ea;e8ue%E z(qIW9+-e>dS75_a?6{exI4|ZSQ6o~t$L-DV;tK501%%t-(Z%&S0i;F~cam!Df1M|h z0C%3Mi|ZFW=oTgJ+*{y(o&NuyORFVt%uxpiN^(8==V^qpWCz-}P6{or?z$)|Vq*9H zNr}Exkgo7M_AJihyXP#9Wue%8p8x#`xmkY8wvzVw%}Mh9We2PePBMu9uqD$fhd}Dt ztDUdxxFx%;{2^dJsb#v-g#elUdCWx@yhD0?Xc``!DA!{a^xCbWXJ@6CaF5TQy*vJf zMFp>zvrTz?muabgdGsAF#<(8s+zUpoUw8retpX03&kH=0D2?n?Yq!FdJ$C%`wsNVe zc~t|_Tg5(-CcvJg1D0I6hdaNFONGCNbaDaID!?eO6qxW*f`%`OHSc_Ga$8y2MrZZ-c?pqGuS-)wr z&Cd~NK?}+ai#}R*sBAeKh(G*jtxfqKlR~GTdz4LJg}z@pp}v-^TA4IUjb%DK#AXdb zOKeAi8(*{+rCKWqOm>JYMstZ?#%cK_~GH@XQ_l}-`uS_12F%|FhV=)DySW(xbqqTvXZV3~c-_8a# zOG#aJD9GE7_x8~zs|rhogHbu@ocrM5NnIQr@Gkw~7Yg!fG0iyPoJFc~V_`aNZu+Nz zRG9e5q2;gUMFZ}8HhYVa%^{}~S-a~yAKw=5M6t$1vgqz?TFCt8But%Fo+t$YI}xb@ z=xuugg&1drWd{sjHuF*|kBif)-rGmR3L*|?B-1~ie@se>{m;SPyjI&czF)?Aq}2Z! zM_<2ve|rJGX~HLF6mmt%`UX+L#!Oo89{W|++3|i8><-Ny#rr= ze3vBn{0vl@i%oBI!1#DAl^7>fQMw8_tVJ!H>YCd7i7vX-t{`Ynn+;LqMo^~BFT=Jz ztzQkxz?rp%M`mpetquZj|7jPG)WBB|PSMiVOI(jW@1+&%yC@*QVV1&ZUFVM~i-&n= z&zJjORy{e!Pd3ldySuyYi~ffz4zD4voFYbG6Nif$yUMMn&ut?6c01cF&u@*Z(Eih` zRzZFDGZIuo(z{8QJBZ#(_18HU)ONq!T+B)M5`>> zeJXo4ZZV&L&cjEM0wzsF|M2(uX`p}%0-Srz`lHg);8Zf4T&EJ_o&x-5m>7;hVYC;@ zN&t~rxiCT&I8kB*d~?|SaJgHUg((M1u*b(!L6CZguCL^Uo1;{;Yk?pTv+NWxGUcF#r7WK zu6O|#$cFX;2}n!ej?s1fLholT&gzGgG`#ENxZ?_5!d?GQTw<0G8sHvQWrx+jqzndv{}eX!Gtf1{|!#Q`#E*da!j{#xJ` zn!~9fJ?_f)5Z?Yc65Oi8S+dyDC_swt$7f`TD(#!TZWHNN##iGv+zT~YDdHthH0@r; zQr-;8?)%h-4EqxK`zy!YI%5;MC45bxo~+vW zl19X-ATab<+lVT8G!RJz%vU$lT@_B0HBRhXfUeBpB3GNn41Gza`5{UODWD-UiT!9b459d}RdSdO(Aq^J1nMptbk=gg= za3KVq)lUKij1fS+ILz;<5#7x6Ss{dO5d+VbNrSJK5+U)RUBD{9pZYH&r$~Wk9lSXh z?XE)bkxAjw;OF-=e7CQXF&lJLXO+jKR1^LEOc>;?uzPEbVn9@{LpAM!w5>PC^9&Fv zCg*hs!HjR8xoDK=bs$}EPb0I|Cpcq?s`^u^WTz?FCF)Ec5w#c&y^8aI_z7UJbv^6F z;4q~ZZeOkOE1DY4sG|vMh>(m49695#MBJ^tZ;cnXaAY7=Bf696qhpoj(;cl^jAvlH zvSa&a0%@=ZkVeo-0&x%`tNUzUC4rFm$ZA69^{WsWEom?i`M$u1=7TeRTD&-Th4g4L zeJs2yi<^T&Y<|TNYp)adq<~k zd%`v2^8941X~!dZ!yi7Hbgfr232Ky~TP=>ivlM;l^EkN;AOK}_w@W#akh*pINHQqS zjY>j%wQ^rBJCsdL8DWO_SnUS__tv>mh|etH{?00-r)DDy=?kBETX%)|o~fS9OtS2G ziox{*gD{hV1K-YNw7=JiObVu5*-75dx|zifrYF@nzXv7w2r z#hcE@Re=R<4Rm|qy8v6Qkv@Ba;0$iCWK~{?t2z_F!{3WYFC=&U*TQ7~HM%GLySJf?Q#bDmr7AM)jVYr7e%h|7d6*UJ&YRr$F5@ zdr~OI<*B%=!*4OPdFJ5gJ0bM}v%AtYTnLglDgd{W8PA*G%5+xO9OfV%hi+9fBSeuHo^8!%w<{ z$)O3CkG|yN$0qZ{Jan36ZcQCr8JN}O6-BtDyOvQ-9`npbv=%|&a zwXmT;Y_0(e*f!`HDc!Q2Ev}|VKsV^R&XeL<%$g;;5y7Vj( zDa|c7_sn&3!7+6QMwZ`*pyKHIa`$F91NQh)6`3aZ(B^G9NupvtQQsVc$ED{7~1j*gI-}5T| z7>9~<(5pAAl{tgUrxPCNy2RveS0?9OafY_@cL}$|U?e-ISuyR)PET+3{NQuNq6ag1 z7FK5a8Z@TPe}RzFVM^d81Mr!o<8s+dUs|p6Ab2C9%>6>Jo;KhF>+{fGg0-XRyJjXn zIuQq5oybA)Z%j-lj!2a=l(u$7xrpzXvL0URoFb)maH~WcP`ewEz!ap%M}9R)br)*$ zV1|Uh7YoJ+gi4NfEx8~LAIDXerTn(3E32dy&w6dWCyNn#?9^AN99&n1b+zo(CUb^~ zyzM7nSiI~D72!G7s`3Ao+DHv&IZ-=iK+hwlzjzqYkB zZ)iV*bG;Mw%|0Mkrt+{cZ0EyfZ5Zv4VcqMbq=X_RIj72bxT1at>$~?fI=wfYHH#z> zvy*KX6>sz7Eet>HEjsh@pej)A(6ZlnRmdJF`QrJVv()25Khkv>C&v4k{Mc*3OY90s zpi6HdHE#yN+tgqm)jZBS6`9uoD|c5LuyPrlwl@s>lW|7%nJF^N-myCrPSzFGht~LU zSe3!Ep+&28;$?N~NZ$;jBW;&hy02qdI{iJlvz4_}0yU?yV ziZpm)X}1bqsJ+kSnZ0?Ch)oS*`A+>3{CRCHb2^R|J$%-Au58%t3>&oRCr3$24SlO1 zfhBn0kC#~;rxG2{pdu-b7|{E(h-o{c_@N8Rl0a(CRu!gMWa`ttZ^EP99t~e;{YD(qG7qy+#Tz+~{jCeN!JD%kZU8 z0~>UnW^*>Vx^S4mjfwK?Bk~w4wSNz7DwjAMRaIC%+e00nRVi0vgS*h(0XprIXZts# zw3)T}tWy-DY*rM$rfg%Ghw&38vwb@$@0HA7v3}zZ_?AR1BJ#h@TFoaAKGKMDN0SKR zSd0{dcxO%5Ha!Dhd(Rnr`mN6lJ=jVvvGc?>tCZS3(dyZd=MZLZDdFUW+ZFiM#^_kx zprNOLw07)OSMi-M2yQR9U+YGKd;G|kH=RMEbATHh8C?_z3g9mtZBE9%X!z*gcznpKjq?Q9 z@;Y?G>DBC9t2-!9lLe{z5Pf%;(v+8>LkI=4W~ELhG8@u-I6d^)XSGjU25m5r@KzFG zHgVLZ=hP&>Gd!`^J3fKsEFSKq-#yBH#{EnDg;;BqAyWWw$OHeos;@GAh(eJ*W_g?{ z`W$n%`qMmjtpC7{@E8Dgl@k9J$2tMb*FwGjOo46*lhZ7DY_L% zpMSn15?+BB)izkTBdiyPSr0PPo4_~=?et8dm)fN2SXS_};#91f`7CSP3fkj%j&Cu9 zyWepKcZGS(s?POwFH_T-F6uBym{GV!V9&zW-c5_!8xh|F>QdJ~u|aFDqrW$w5Je#s z2tz3ofB#rijd*v(U-emUMo^EXXKOy_@U=Eix zoOhAc8U(j_&5^aJ140FS;4aYiZ73crKCRYhp3LN*DY5NhmUDTSyQs}=n4}6e>8bxQ z*B@o*njlFfjU;uISJF1))4fyqEg8Q2SOYc+quH{Zj!MtqY)@}gQk4ja5FZS4-sOCO zd9lFPdL%D%z>uhDurs*IA3?D+z;jW9^U;w=2l{Kw-!roFTBo%ZDBAx5Lx^|$dd%pn@a|1=xf&-Las=n^Q6IJ zGE9IOvjz%=to+)(VkRxvl%Hu4y7Q*N8cJF#O2&IEI~>zi#xyYG{S?1FR@C{1{s>$q z;0VcWs#suOlRdkS>hE7YHSAuka)+Y#R--7iJUYvQZW1 z;usyTBZ}xL+gaUP=~0r`Sc$h@d3jDpp(ON=6PC9%3faS}9vCgg`p)GlL~?&~%_*LC z38zoAjQR3SZ~Y6!Mo+FG9(3yNWoXz&+03w*eAr$Y0o3;L(@X-S*kwqKvT+`0Kb8nv zJT8_X$+!iD?F91e3E{sd?{#pqGZ|Qai4Fkx;1n#$hB`-$Xz%^N+lK1TMY@{q#vh%M zpFBN6PwFnJ?hRobEAvJt&muyvrRq}hy6!-BtTA4bmplafv^H2%yK$SqRZ)MqJOQ_S zMQLN^v0t#}1NXZiR8XbGfJc81(UF-s2QQFBP-qT%ann8vcRfxwHPZOTsUdt_{%Kz!RW6Ha6~?aF z{IRt|qDkq{3}4r~tG9%$9v*W>WdmmVGLn%hG*h;4YRYNVdWG=(xpkH5=PVOp-2sg$ z*XYRX!Kk}&xrTx<0Fj>|qF7K`CrBdLgX*kajk$*H<4DcJvx&x2sWkw=#~EZ`%h4N6 z)poVvju9Czp9x($UFZ+>dp4>dOY07L3@&Lh&hdFN2xeco=TA8Z7UAIl3iY{7W;p<- zYjEMnbuAm#9hw2F5D!x~p?uC#3pjT1X6g2738@QGc&`#GTk>4c(o%`oJ;pp))V`12U)dcwchqVZVL6pNVhCEB_7N47h zD}wf$RC;K_rHkPAQ$_N`+`@Up zlH(F2V0Cn5M_WclXz9(OH^+4ek(?J_R)ZWIzchaMHv!)22q4QY|I23rB+ua|ta8If*}KEKwy~!9*gG zPkC=KpwHdj?i33wQYR*|6WeD=X)&p)SZt9OZVSVTu4r+mYf`^_c7Q$!f_vIsf(6}O zX}JoW!o@pp>SEg$%vkM>26M$=fl^UjjVMcf+afjEJrbt4N^B5f`bO`I0SDV>**HJP zw(&yLx6(FOY3{!ljf(UD|d|_-UR1#U2KVHyAJFGUQE0lmCv+(67b;qb%r-^OIpNuUe#MT$vvbjUZAU&` z1A_^HS2bpS0aZZNJ{FNH$imYqF5)3qPToY4$+Lf$f8>9e?h0L8a1a+v+wb^E+Zo+$Jg#utE{S?p{WsD+F)Z}Tec__9RatDoi-2cWBzUp z3nTL3a*$_y*bIV%&@-R3YXg~4bS7*r`9NJgq>g3T#+eETaMNf=+Ouz z$IJ<}vg?)h-n3RECO6Ff9Mzuw(FdMz;$VEZ^EuSH!eFF;e0LCRacy`e6gm>&Y`GO0 z^|_du`awwpjH(q8-QL;G+RD*)?W)Law$0rjwB&F>w7E(RTK4sIgSd4>>xDt`gu7g+?Q)qCS-YKAK?4fn>u<4V!Ot@D*91wg#7_ z_6DI3l1~y4Dkz@m!$#IKGXCBx)OV|ppN0#Rvy0O9JQ5kT?mDh~nLc?exSEk~hJv=b7roeekrhzIFcx=f z&uYH7xwT46584oE?YJ#+8z#T2%jnR3LJv zBG08r<>tN0yf7)}#XoX>4T=vPPxyBZ>A?QGg2-5(>B3t~30}(ZnC(+~*K-k45iUc0 z<99Wj#Jl9#chINfEne7b+t?(-=A@YU2L0h^^LI}9)E>es_g%BkTVF4|84VcTSsW*^ zDH?fEVEVjTuI>~&F^FX#R2YaoL#y)@&hEzG3)5$@?j~u9ZaRwUeedBh_CJ$aGyL8* z6ji0ajbJi!s;4@br~2}m-}%wk?Q$*gX2c~AiSJK*19@P@1)H=^H*RMCm73B8kE}YR zefTii)kw7FY5ri}^YM;H!X%b;x5sWney7sV5(7qycy=utvJU#)6oX6Y%fkZg;8q3Q zMJo}lon~_|xBV4WhSA^C4Y?!dIfw4OhS~+G+(|z>&|f>Gq~*?c;i!R_${kjBPy~#Y zGV$e%)CUL8&Er*tuwcWU2r2d(NpN|FEyS%tr>^xN{sWgF>}1)>si)le#|lSzeG>SY zV!Q5M8jm;uQrcD%oLZF^fX=)Qpz8O3F`*M+LPAaZ-&`P?dhthxOR_-r->;F1%(qFj zdv>A%1imcFYat$Z1fC8e8`C$nSi^ZRjAv1_dipOyybp8 z3TA)YF*mFRYemqv@D`86JUySAy|KutRB?cYNFInl@m;6@UwNbl_A>MS z&-^Tq+{Ei7fHGd>VgJ8N@B*1$;y+>PMdtYbu;riqMux$$UQBjgysq6T%a(bs6+W3w zMAXM0hgGd<{t=iaU2u~mO%Q8n)r{C2cciQ8%~CulW0M~ErxSqchJse0GM3QW7s+-= z4oCogXP+ECE_J$(-8!7r*gmxB9n7g7kDQ}AMYVH|FQ>>IJ9A!fg^%~syoewClc}1^ z0UeUJeWu?cfAw8XI~7%`@bdKS!XtTp27@%76QEl`W&t@h=ji((UildYF%Y)vNCbr7 zNNpl*o8+%A{|Xy_BY7?Q?kA{dV?HD~i>)p0Y)Zb`6Jm1RcFiEhnXf04XojUDl_!(r zH>u$ zq)zbTjzOTgrmVJDQXzpecnl3PBMf4BC z70o7aCV*e>%db1vZH2o zQO^IXw%(GZ^>f!Up+WcR-9?9Vx?yRpefmEj^%Pn&O`??dklM`i7_H$Z1@p>#Vxqsi z>39x5>|M63K%&u=bmR5J1!5NzzYD3c{?#VJ@-F1gIof8DQAE`(;oQ+IJEm|{5dko*r4)DFuEO{*IhZs#{BY~YG9{`&B)K@VT*;egD(WQ(inkv-$AAoW>R7{ z%9;h9IkMi zv$trg0A?H`MFt@x`p3)kpl zdK)ik&LqrB^_}M;3>cCM+a9IpqK&CwSu~No)W|8IoT_1ltKDY$kbeF^d+Cdm$1ZwO zRFA*#Z%P&^0%6+9w;tF_g+m|{jZit1SNkTQN>)j;Vny+y-~Ny10qAN0elVDNW7c!} zPHazYaq;n}_}sOFle5W}zE%5&*l_KP&&8#+ne%(T91nqlaNn&=;H3_v(Koh6wyN7n zgUgEKFJa!2CUh^Le-zZ4!xmy$`twQ54}7dh&nEe8M|>=-w-dEGd5T!uc*%>LWGz50 z{;|$q^FR#P6NGVGR&RP-yX){`!{JHESCfURK}*l#y@NLqr5SY|KyOUU!Q6)srNy8WqvO@Mja=oJW<8-~Y=}3cYjVyXs z$Dt-ZqR2h7cBIMySyLhL=p15@(9vH1E7 ziN54H>GsDc$(525rgEh!E@S8)-$aZw`ZO?J7A}Q)nMDh;3ih!egNEG(;;H-k;1wY& zZCkRHvx%dhHL)l1weFC|Ep%xC(o0(Jid{dw*eX^dTJ+)w6C(tO31CyuqiM8$(WC3D zB$xip%c1Z3Fh!}wGAXCj&|G!3_GC~2A&~A!%a+9kDmDq9pPvixu<^Tl?AE(IdVl3i zk(%L$BKH<$Jo-Q8e)yTZ*!dXso5Fx>t#UWqo|^36E9lgPc{=qVxL_Luo(3>KNi))a z3g~JSv7QHu8`?t~#NxZ(pc*{lov?!eU2$&@L`;54(U$ z_qX4*Q5B(Ib#xkP{56s90P|cdY*t+_N^Aayz=SFGmiL#3wXa}zMUF{yo4#dFadXXf zZ&;&?m4*&`mfE$K(g*H8geQYK@Ns6*^Or}(yQ$pe{?fL$XEu4U>l=oC@GsV8gx2nv zKAqkiVQ%Ib$T+(mG1wD{nw~uI+|fopW-Q9ZE@HDgH<#((E~J`V>PXcq&q7{*Tmx6J zhVKs&EfMUow4A1`cVsS-al8IgwLbn{Ok6f(f6T4Tbv^^%UQ|tdaBc^0Je1^CmPoGm zSB@N!@-=ZJpa9S@&|7?H2-nxhn7vsI^B$Q6QG<75zZMVGKO*`gNlGmDZ{^|vE;+w8+FX|Y5mL10{9%N2tX|$mn^6C z7a;9Jfr3|Jg0&6qMOH0{#rQav-~E>%2YO2o6wv^Zz5f_2Xn%oZwIDJA|3b1T5Z1ld zMsN70UV8sAA*>y`@yvgB^V#3SpPnPTGAFQ~wHvzYRZ>u;N(>24OK11MML$OJ&S9fl>!h6lil8MvbD913KE zS#BcoRZNhocxq_>ZE1`5eEj*tB#=t7BeZMi0{O0J69xZe$kl&v=m|#;no-GTmYWqF z%S)al-h17k>W2Tva{yrW2OW1*9@-$VanGYK!xoE5bx`=!l`E<&DQ3Z|;vbXY=wz+?4niVqv4h!@hOYOgO zB*7}B^ixQ@p54b#^heK{160EBE6b8)QD`7_@O!#$9p}wX4iTA zML`~anv=p*z}H_8{oUpyZSv>o{}=k1fTkc^w-wO)v`he%o}&W1r3^Em;?iIU1JIcS z^uP8H0gH3tT=z2ooV=F<-(gJ;jA+F>BabdG*F<)4EV$nqA@gmWf-n zAR&N()QFxw(L{zN#uQqse!f{~I#}vf=>)Mvr@2@-lC6t#S3T*6LhuPk2F$B}NHrb_Z zxl-3ELw`^S9Zdn2xT`5tSfF=lb=S;-kEZ^_@pX{L&UsHSW<6snwObQP`*|%m(ZkDYRLPX7DwGFI8t@dolj%Fb6$huhaBEUZ0!Z>ha)jwH57n15^(9TD zL=xzCj4vw@ln@$5WPf{Hs$FjI9T5T4==t-Gb33mOaTy70KtWAfR2m#4FrfbUy?^uK zj?gfX`m!{brYkkcEeTY`_K3676Ru@<%X8;}zcuMsLWuXPnUz%~65h8$d?V)d?GZab zbI(J1_h)u1L9AL#Y7A;b#QwL}iQ67W_!M|^93?vXZwUBSe;hOFR`&kx#c?GN9=-iS zs_0Vm$3}@XT|$6uMvRBgl$dyz#ZT^zQ{E@d^trAW;geD{qgV`V!YTX_JLnfn~8re%7F`#1NV1eZMHvF9j{GBrG~uCJ82iWKF-z%Mb$H zb)V0A7;imf$cWJZ8YSw-luRVux8p9 zG6A=MzyMGtVn~fh8)>ka0Q6^05z-&DM3-R106D$Il}%6@-$%DJ zWp~r<+8H0j2bgWIAiGV@v}bv6mo=irozCKNaJEk!v7jJ3@E(FsG2h(02(4o*utDn! zL1sxYfkqc>5bwT<5oB;Crtruj=E|<^7w?Se)Rj;l%>R<^)>XhEq0&(VFNn}9|HMl4 z3m>{Ad>|!Bq9p<$jfMJ(lf)nn-$*_9%P8)27jW#wz`~C!l!)8vw9Amda3%Y`R9<7I zSW3thNKJOMQo0lRp-#w((gRPp1_?054MqC>#^|7P8l-F?(so6GfA`fVwu3vK#!zr9 zl!#9m^ut_3jaiKdWrwi4$GpkB_6{lQS(Q~cXW!;; z!mqXdXizA?6#8m}7?8im)CS3M1dhPYIdJA#k|%=Dj91amtt*{lFVWHaI9drZ*{t@# zToZgCx}vNQG@pzVH+)M0%m{E^@EWAXT@M}A;i1E->={xkttyl(0}ZHn@Lh_Sn+^H+ z12W9A$wsW9Mq!RA&ZZ~VI|!Io$q_iR)eYdtSAo%=b#Ju-qv=A%GU5oJNkY)3PLW^E zeGIdU^Gl)0vVxhQgH3Ars#_lX^R9s6WL_af)~2Ld`E&XkDTM?9?Z|}!W?Q4N&$p)jAY$nB;652E`c4sz zGRT4$ImNQz*C2rS+I+fRyp%4@Wv&3W+uey1T?ys8Q2ETOxmmTf3B@s;O_w;svuSd( ze4<%>R}XuZ2c(06R%sbGtqGA2s{9BDja~8|@$K_ThrH5^{%}rXNG=u=?{`kK0yuK% zs_fgSC!VJ_fRm5`b5aVjn*%saM8|naa+fbgNe`A_4T5}hf-o_2ZZ=MnVct9#h5?ox z;-3zwnNHCBwA~$@B>NlKUsoy~&|=&CFQJY8OK8E#x1o%*)gFSZPww$j%KbK%16zO6 z2|R5Q&3Te%Dwqb+91y)eJ8iDfg$IoOfpwK9?$BP`x|VbU`dp#)%q^_)-0@yq?H#~} z({_h&gwZiC^ztq9lLDuvU(c+q%Xk6U3+pkt2pb{8rw{xCInxC5hgL!Dx ziR?ziPOU=$XC~~$Q9`6nRM3ey6z8)x$>|S&1c>uZ&$1a&TViK*AX*E{;zvH zcS=2eU2DcG>hj@;FNv=A$eRgt_Y!B@2ex`-vhKK$!Y32q;{?q28#rxk0^4ib6$}1J z^>0ldw)$t@?$}eZwy5zTwS9-mJ7ZKK65&x1fD}{$8QG?+5vMO7@=__ZegmS8?DpZ% ziKV0`o%E-%yqxKy3m_T(6IGnQI#H1lo|_K^B)_8GxGwx;FW8F~$Z>`hemj1mvoBzJ0&hR27oM`ZEa>G4Rk_gQk>H83eq%Hbu4k!nN)EQ>XLe zyL_22OH~ARA zH+?v7ka@cwFrKWw>VPQGs1Z3Iw}pG?l9m=ox<)>pB*0BnJ6)(6-B~-g>&dop&lsS{ zi26^CQ^6TX1R^DXSLYE!FF)xdd9bnMjR*j|0m4D7R?MNp`s;B7!F?d#s$X?$&1joF zI|eZ5XYNS`eMpdN!|B3o2QR|6`Rl$20F4OmzXTs`ZWW>9gG;_XLBMJ%){BqGUkj1S zC3Lj>7`%@U@hgLRkHzyM0!RR31z1@9mC%R3JudMvMKQF&Qr#$#phoU{!>IV=#yk8B zALT2(%>{d@g0zTC7v0+{Y?pu2#5Fb#?-LK5nBNmuk(d!Crj;p00`L=)(0D-WzR6-L zW~C7TD8m6DE{cVGWPatt3bWnnfHiAuG6kooL9g6#Pycd(6J){J8 zTv;#a=-$@`7uW~4O^AKlip~He&Q$Fuhpo8$$5*Gwq4KA`(+028>wZKP-$|C2`2@tg zX}yrkyVU_CrClm|u-lJ)A%3#ZY5!MGiQkh&I|mvDO3b_F)zyfulD+wLy**hE`n*+; z*PT;x+^oLK8ULj$5whC%h95~beaWOaDNJQ*zEuvBMZsC4CIM??P|DyJoC?k0YN-J&!{|p~dt-%h>Nhh0-K} z`L%guZAgGz+eA=jXKQzA67Uks20vhk7MM#SiJ4UmgBp8^kg|bDBf*yp^X#neSQEZw zoZ@8x^Nh(i83Xh1l7BYsPUlFHmuUqkWU{7kdHke4we{z2AUn-)j_}t^&aaU z?}Ugu=9hrwf=`x#<&-vGP@~2gLd9P``G90xBqyrC16at4DzEXlWG3)x-_c=Hi$wwA z`|}#Loa;gLbyd<7Jt$-G365FeiYWfxCcOR>Bk05F?;K+dJtrf8!{P?~DL;dZhvcU7 z>o-G$m}k0U43NmH`aiu$EiMWH_rSQXfv?U&X8@P7CpM4^$&kCIm=# z91?j}{EflyX=0#q@+LuFz)fh)ym}&RQLn;nQ4~g<#P>BDgfqwPt1qi)UF^2=MsK~) zQGaq~t_BVuSJi=?d4Bg$HD&}74bCr$4#{?J)TfT~m@%{*?T|?L+=Nbdya5hlcdK%Z z))U^t2#7Mkx9CboHYqZ3$NOU8RYGd)v=AchCp4_GG=hQmkhZg)m>K=zcWE`x%cyd4 zc}K4n9`?P#*fjb50%DYtO|ny zT+`$+2|pR^ehbAIAL{bk#=QEJ$Ll}D98+*ke0I5_^&5Z_AHC-?B$tI>gYwybTK!P4 zmO4q#Ai_LqGf#y8@XDQs9}X^k3)_(ZVsWy_G-s~vADv6+>8ldoL3)|I65hP#1X*H? zxstY_SvngT>pXVxzhF_n=E-vx;%eAb_3Ev@STG;tnQi3yGELToekoX%n$ia}gDUr-|*l}CT7^h1i( z81+OwOnd3x+%31eS(idZ9voH3d*>f#R(dXy?LNf@0MX@Q>!!$qDOL@^2l15~t*k^o zm!YrLf-L8RL#uey#BY*W9$3}+NW@WIY2=nLar)%~n1;_}IUEgzq*o=aG#H=g?X=zR zfsI^Y`yOzZt;m!X+{xDn?a!JsJz+iRyNXY8T^igJRUJYRx+G5U zjxm|TrRbGRqB7{CrglV(zDa>mBbTcJ*%sp+E1j)Z5d_obpWo~|(%e0GPi}iPO;%zk zpV6O{s5S`Z9CMo@=7p~H>t!X!*T+-*_7?^JehkCBA*OeD&x^q>43n( z;o|a%>MKg|88;;Q^QT6EGd7VqI`LatdgRqywim(YnX2(45*1{>PPOj93)9>dq;(Wq zp^dEXzLaR869%|D;N2;jVt!$Ygb=~NpU9~!KcZn__Um~g(R}I4(V44)`T zre3C-jfNF|vmr)&ovxLXwivU~36qZ?VCqXl9code;39q-e8Ipu?h=02ggE7_F|^yUEhnUH!#*Q|jT{DM{h02(8Y#(a?A< zP2VLZ3p~^NzC{$n&0oz9%@pB%O}$|zjXwcYYe{01z?2Amws}dm!QD0qYi%H@1!I>C zmb@w$*ldik4*2xW{A{shCSzdU{kO`M$JSI)8>Mdq0cL#qN#pF7MSi!}&O;D()fIz&>$ z%?)g1#Ft30`$_Y5VR6x&il0WaS%IHIJ|{Sv&#e(Mo31lFTC5&A_D_Fs)sV`~)cNRF zrsCm-gi>>xo%Ls#JSN1NMbz8HAAOXJRuVHag6rgC=kFw$IS&s$%=J=lu1mN%E)NK3 zM&4oRkAv80^vi`KB{#cLf4`ceyz$-_( z<_sHy5cl}#c<(K5aZ)s@qcl<9J}j$w`vT$1A}Zd!b+YI|^g`Xr=rLN(vv>J0&= zYgt=585AX5`CWECpOgV-?`N;gkTdq)sq5#XNeBHF$QwTPQKM%mRoTtC{R@QDsBwik zn+!}xY17(0`Li1t#`|x0w?y-V9+aqkDzM)PAf&**!w>5$9*^Yf*HI!^RmJ{T44M`= zya6w9VP+asJJMfQ1LK^p2yvqKYZrO&l@8$t5BpQYFcR)3;hZqm^~{Oa-1Z|S2vpev zrkOjp?5RI|^QO@=*SRkBy2pt3=7-w?n^RE_gTNTM}U4ByvpXzcR z-n&`ZI-f#ikoGKv87i2u_{fB|&1eM~mo;Q;T7Dxgz}*3}|DZ6Uj!M7g)G_JdpVe)- zB;R+GMFgL?Rw1J-X>LlGkJl<^SWhSxpg;g-s@%Vl5aI={$N&M41GyQO_&pEc{~cSG zv*uVmT8mXr?~b=qriPkVRlNQL2J7bJ^x1)r4R{#3raH)J1BI#w<+ zirsQm87RUm7m3P;>^JNZtacior@E}{kF~7bL&#fpEngX`cSlNJpS&2Rr|70Uc1H*C zh@#}u06F`l!~6MxRiyKXdiII8SY-(Rrj3x}2|a3j_qp02tY6HE#aVj}RA0e`Sm9bY zFOxW}d2_3M7T?%cll1CA93`OiOoKPY{iJh}G+O)n#Xb@{j>>WHW;L{(=qe`|yqfFF zoOtg(KVaD^qF+C$9&Q%RC114!$DzgV(hdl``_+J-0n(svmxnovo(`^7d_;{ZwFt&4 zT7O0sjp`M)CXPHJ8(g>le9V@UuClrH@)6ma!qyKO$CH8|`c_I+ttVF!3U5-Xd@E$$RLL=hk+>RdablvMnUX^?Jbv zI`1@^gywEsMRk9<9dunsDiu(QId`^Qs97IUe=P1Ghksw% zxX!1a)i*_bTMJg=QTV%m#3pNe_kR)f)p1ciZ?uYngmfc~5{rbSG=d-{Ah2|!bax|2 zDc#+V4otby$iSwN24C)CzUckH<)T(7Y<~pD$&b%hQ z#reYRb{F0CJr81Gf?G1ijoNsc{f=myHS-3pGp%ouE{-gvFd0yk@)kB$T3X^Q2srFU zqJTI-ea<5S>U@J%^BA%Dh1~sBFO3PX>O~`F1&2M`6t#AHJx}uYn-i!LoL0U*Kl%wE zSatnm0y+Az{X>g0{)&%IUtSCqO&VC}e877ez2&aUleG!M@omZcgS6#as}5yef3)me9e^JI$U|j+*|k)Cg+Ns zeBo}`HMk${qxm8BK7e!xN8RW8FzM#c1=4~?rWF_vY6<};<6)-OYpJJ6^>S+{HnsC8 zoN?H*tg*t4Hx~%~W=J7g&vkv4`87*@R|AprrG4k!hID+QlX~jq+Gn7%)`$927*k3` zH?1-ys+`}lL@Zitfn9%pzxC|VY!`^A%E3{XW4HMDbZnt86K-?r1_jo^cAaM@DNCRu zEWz?S-e4K}qt<7DOK0F~qXhINfX7r<|1J%A%C`C;tLG&*|DL|LbIGFT{$P$;oxb-t0QuPvm1 zSiL0^ei3g5Ww6$U6}|wTjKjw3=wTD?_shz5r<_kd8WtJ&S}09>2>$#hbw`SLt@np7OvJhJ-Mg*Ccv9CFt|ORu7n&4r}OVwTe8gH zo0r1}X6Ow{(>1~`;^p)m054HDO>H`s#Y^oQgco{IHchlvEdw)@v%Lk7K;>WjOe;h&; zfM@t6q410AU)46yBD?PK?b0VBYTl|yE+YZH0Bw6aFNjCyF*dgLrjcTqa3meW|fuSC{=!I{A>YLut^#f zR}AjY*OZ~7BlO?_$87Fb31)A#w$H{JiTc>m{a)oJW2)cm@)79{(>ALhBTbcy?#Ex& ztCuuY`ZyaZ)w;MMw~~`hX!SRtXbr#AZb!2F%fXgESu$;yCni8KBQxQgatW8Ov|B>H zUiLNq`+}(XKv|s>?5?t4gS3_Qc`?jOrv%)enbOI`zwGuX9Stw+KN3NzebP$C_wA&n z4@wO(pykMzD@M@78rC24g3#BOLeZWl*PaI4OiJu8OKppt!{Qyv7W`OD!xN{2I$l^f zXN43)ts0_W4BJv+gAM+DYF5Hue)reR*!3;&NM36xo?h*cNkjARE+RpwuCE7HVykq#x_*XFj`I zoNwMRaGRuBwcfZ>n=!2Pi0to5m_NML0aC3#$j!vq)cy1BsnY^;swmy5;0xcT-eIzH>P2zUi@&IJ_dxu0!A(Cn)cRm|&TI-zNvTm}!p zl6R^xoM}W)haU+m2e3S2JDQL`OaCn~Eksv1(I|ku)%#v_0f(h{(i6+)Us26co67UE zfN7+qp9pW1ZZu`8@dz3dXJtCtVmr=OSF3n5fKS{-BsnN7+J{t_3{Lct6I-K`YAu7U zfo0a#SMD{>g3ia|;WjEX*T;AEm{$U5D`>sxk0m|fKUu1gk0nk7>4M@>6`7dKG|u=5 zOZUo<75*wI>thg-4K)%Zh{`@KB#7yGnR+0Fk^VdHHxarAEz6RmTG|uyS3`+Q-k+EA z8naOZePtJfyZtS;B2Us%&#D!_!jF6*f^u>g=4UG*HQvF_;0tj-17xopUl9L5u^<#L zW!#e_(ES9$&VCgXrvl?Cq1*RwGJa}ZzI^p99z&7tg!lb1v`OqxDGqa{mMVm$F|jmL z?i(q?;6i=?KrU_1`u%2@xR%khU;m^gsbvRdoP{9Mys-pbVy%?uerzHJUqe9J{+~L95R^hLUqs{;AgMIwRL0E2w24wfT!IOl`7Eg=qHW|1N>7r)>8<>C#PLj_*v*O(Vx3X z8i-dw7#fMFmgfm`HuQt5;D7piS;$?Dw**S=>(qDsQ@1>!Hp3=yt@^p0Psf8VRF>^V z9lR+ME5kO;$^(+6L5AX~us79av?YyO0#@YOMQ=uLwqh7cA+DE;HCw4~l^;khOV@z|5%V;zb`sQrOdvDeT zV8+>>=>Sr(K%<=abLxMp5OOR)N#;P}N$F>9RxGOFt!pz|D#PTcX zG=u~#cZLSNQ@M7?&U;ua&bW$7NIGX7nINQm=TmDY@7{T(5iNOj`GMA3=JtX-}D13csHAr?T<0ZeW>$fMw^Dzn9!@pldYlx-%1z#=Pf! zS+jx(m2!{&#GQEH&ZSqiDo~QIdhx`QH@DPC+$`#G=cw_e}Wk4svUj0_y{# zTmR^HB^w463VBuft4x{`EEthN96?&Eu<=UU+b$S}3;5j^+SI;M$xWU9ib+@hvH1ZI zT@sNh5-%zux0>uoEsTw`-4mLubUH>h00sy9EB>AXN9nRlrm<=r`>`~TOSkB)Ipvn%6M3z zOm>?Yb500YZOVLqS|tsM9TpFGVm~Zp#SHfRRj%;e-Xp|cZ_r@3yT^#g6zfe}fp$!Q zHj~%F7Z?rp!mqgQ&Yg%~bW%z+3-lY8w4NaH=#EpGLE$XF&%BOQB`lry?9VU;aogu@ zqP7ojo3G}q`;J4es-OCb{plIFDZZoKM~%0sPvmP3W{;a2PWs%(m5A#j@;#nf5$jw2 z2-C!qogh=U0h}P*K_NuWrRgckSMBxYych)eP98>yc?08HPO(%=p?v$}LDleKEA7I+ zTw~W_=;w5eh5nTF?`3fq{QFg^w#m?r}d8 zK~{4)^U|VL1CT0m0#IYW+WM(HpLZcyzWz34{c2PUNVbJ_V>lmT1_23J8@4ExGJAJg z-Y+TVK%G4B2+;^MM{BCS> z@3uVq01q3re_F|~GH-SN8`){0k6abtA*WcIA71xs1YMmGh`SrhuM^`_83d3=E&Dt{ zr>qW-r;8NA##4T>GiKw&#S|GlZLn2_RXtAj;QUKj|h@fj~?-ZdN`+?;pH=OKD>mvripG2F6kpRfiYE0GgR3SR( z_X0+lh%u5GpJ?>)0LayiuwEI8w1ty4v{jI&%y04emO835ld96X@Za_y(3u#mlgCki zhaBR6(DXLxYCEaFl@3OOWYscH`P5 zn(x)x=BaVJhPQsTyFFe%guQKs8wI>0Rx0YIX?`@M<0TqG$VfU=B&O-^JcDwY&<4;P zb|+Nam>&abw#d_Fr$m~?qVd%va>SIS0FiXS!F>ap^er7#ujuJ7CV#@BN-eTcD$+v% zvkyJ?nBwUsYs1mV9CTY6C?Xf1!UhGBz6A$MzoW9&3=C4}9EnCg z&H)hBya62GNnkut%QvvQlbrEd+v8S7ss8!GzpO3Cve3U-ZW^f%v_k< z-n)*VhdZ-<_u?+*&gQ&Ib(nW|FuuvkIA+|<1KaN4r<_O)smVI$hnKtq+kiXpwE=sUhR{ICS ztg%F_k)&2ZV|Al%KEef-3*<@wLJcau;LrE~@7W0dG}EOXF@H9`_0dP0n$PQql0}=E z!8=_xA+RG_(K;XD52OQ&h2LQiGZJAJwOv_Y*Y&dK za=tEoDUNO6gv{4gR;XBVN3=0bBcEr8!QZg(Rj;DnZx^70aTZc0knZgY>wNbhtLm3p zgL&&11R+6C19eoLu?B!6$Thu7MOdNB38D6E>(OK6O5cnv=5palQb*qPZ(G|c_(X=r zBr{Q|$vqSlc*UImj1i%B+#0V=0^-ZdO4>+z6BXHhDF6d{q?ZVcu^H}@FDnL6Q=!no zL&dz3)E5WxUb7FhA6p6y%X(h{i?NCBFfmvK*2Gf1{D{ECaZTvrCkLEgq)KExnJL?6%u2R+UsI9ovcg`6W}MbY8|kb^)i5SZy>K_7aGqNiMdKNOnGi^EvhU7-SjcH znl>S1(W8EG*=Yg1F#9uk;ZHKqvWc|tR>X z!&B8xRlp>e)XTafO>#Y+ueI*h;Lc|;Em^*C_`8Omr{aX?-F#&c4mjzy8K4h-_f?;Zwpr2r1L4cP^EoD;8} zkaId-yP$k9>i_q=nrzOY^9;YM=GZU>=<;c&hjtBh)JZD44Bvpc<7^M1@!3YAg`9IN zi?+Ef_W#CzR_~VwW9@u3EQE5LdGofuOBMik|1F~wVJ~-H3&-nu#~+A=6gkaQo>8@E ziQ_Y`{Gnv-5%<$EZgjGpXLtd*w<88_`}&$ljb%;nI_y1D0jY?Z*8+cLA`0Cu|vuIk&$s_#rB-X&B z9V2`NO*vEVxzzLaBsU7ds=>E!$DKiSv;e$7-%Kr7G4WqD8LP5MZw$5cu)0Plte^e2Cennup=Y6QeS#1x!V)xmMab^A zM7P}S4r)TFh`%|_vi`;T7$z@}m_uL0_<``BYahrta{u>$EZb`p)6c{3On~XM0U}|Seg&o5h&f|RdH#?V z;ke>Ng?_l@RW(K0yfS48`j4_b&myrWuM=}@1!3sR_GCqecmFsos8WjQX=`l5`Z2b4 zi##>`wG9`?`j!Q#<17de4|yV<9wop9oVKT+>N_B?>CZOANm4}3xyGsbjsn``$-ajp z;&6tBcrBhYW^X{zn;m?wCXK3SI9H?#!iYFRpbVm2bD4YpJip6Tq}6Ca=h*1CAXSDj zz_C|)tL&?095cQOvM0$^!ymYsjHjCVeNXIF6eo6on@KQ{+ZC_Z5~Xy!PrB#&cS8CS zsyHFt%bRnNb29m1bOGu3Z%S!(Yk_8v;zpsH2&8ZiIya(6s`*@3;Q|e@CE98(-m3_IkP3uBjgx-MjQ^I~^{ZUZVT)OGVKgGz+Ku8)S@er6fIfw~hQQ+XwIpUV`YoR1iDuM-JGi z@0NNTS#l87hz|odpTJnAJ^9qqjW#K=|F@fHM3!C_Il+W$qtN_jp2Q#2wKw(#x$zzp zC>3#Q8=R*!#vsg?PSk+CG@iN14pkv7bw>^&F6 zi2r9?^8IhPJSY<<{m<;HnTa=`*8X4n4Qe2N!?C1+D^4_Gh(S07wZ1oNq+7sFm(%T{ zyO{QJr4Eq4)sSGSNz-H5R6k{@Z}`vgUEdyO7iE+^`#tkrNuIM=b_W#F5Yw9e^?(GN ztLG=gGEI9Tt0lIZ?B_IKd~W~N4VxdKIl7|e(gL4tG87ep2nI1gdA-JNHADp_$q4wR z?4MKnZi#IDzZw*PWkmRNguhnfTpKQ$P>r=qMw^AL?$eKeH?zEQ_Tlz1e2h7i&y(Z3 z(tgIJ^LCuT;3)X0^)m<5MjkuT(jy*{2Ujz%NoMXDH2R77h=xTdMS%aMa0c?6Ji(_q z3&(5`ay3&y&Y?sFgb@Xy)=`xsAy0ID(}>ERJ1BOy5*p>@R{>TYVtOFGUVU~n^{+7m z@k~(TNYnjLXZVuOV;wOShD(6YfC4eOaMF7w_-_~Hv>JLpu)py2oU$<1t~|{J1U|jD z+H8nJ!Pb_&AVdUqV)o|rYJ9-IUb_-%X?^N2Q$<9^BwCmYq8EgJLM6Snu>FPgIww^L zK22CS&kPV6>ECT>%p@{F!{zj(H}{yP9B~~kKjTTLoYi|ty%}91#xlxR(ct*2R)5U2 z0aWJJQ$B5xwftpYQ2Q@b^=IQB&13uzm!(L`{mYeO5&P$oE2s0VThAxWN>Ss`ba94v zaGcj6NoPMk;6j_swXQ?Xu!U3+alesIhtocZmy$@Xb;fExZmc*M)3^G)g3imaQ_#d` z2Exh;Pc1H&uf06UedNMd?{R@0v?K+%-+ys(8T#!X;~AN&f25yCN7U+GtK$tcLV`eU z{JPv5RV}oEJzt~rkS+xf9ilhyVs4B0(QsKxNrD1*^IlKk3cD~l@)pUoNHrzptZ$PXKg=tseVa;ydwex8uI4|d<83;ob~}O zuSDa@q!y?SRsIQd?|XT(MS}XAaawLIJU~IAj-zviTCMJTOCpeQFQ;DGgS0_SYp7J< z@>WFAz+@T)nva@BZn;3kt^5E zr~PJmkVynhDOi-t8~_(dReXw0dvae`p)ZdSK+zlF z1k!uy9}-uC-^zs%$IQl=vD2No{g)i_?6pW0x!>W;$geSOj&Bi4VjLE11<~3J8zRd{ z;z1MZuAi#O_Wp6zkF-=S5}gh;Ma>pU|=)N4G*4G{@CsQO@Az71h+>P%4> z<2d7QOVRI`6hue)Rv}Y0GtX0*Rw*_Jn>Ku<=57^alZ(imV`880OGW*0rnTfN0L4h{ zEvTs=e#|WFiL}b{O4!qTIXd2EIc~At0)Sj=k9bXbQ?{mXAPP5~nL1vWwNyWfRE5RiFg2vgUu3g7SID=xe>vET2wY zC$M2-TKOl`J)8&#yy$xJTf+YZLU4>hg>YLYb6pO@1cGnK?CGN#ZBdJdfK?p!fn;Ok z#BOB@UuJNL`gPT0L5^U8wUD^%7_lg!e?<=Z44)gf=HNO{`&;nT{Z7UqtQ?-P(We|6 zW-^j>fsV#Jd~9>C+Ydvy)G*UylHz0$lY{|~T*)1P3&M{mv^sq6& zK-`|DC5POEv*UUVI%t*iNS6p1{qJEd8>`iXjx)x6+S4RQOrTCMg=G+B9Kdc1(UG7W z=v+}A9tp=1N%{2;l|?4g6UcCjwE!p0BZo!Dfqt;SZ)K=NeH^hKLL6nl4dsVsf z(~EUcO1=KOQTkZZpuS;iK{yeaM-;vIp! zGWv7$CHz;sQHxD(HITE1gk6uwbANd;&@R!b>c6^3WEty3#|=}uEv22q(#u~r@_C;Vl_(qswg^Z_+1sC()~O?S!tYaC+7D!Eu(Xo4 z18hOwiKg}*BpH!c+kxQt`@0a;tVHe-`QiUn0g}rQlU5=`{g%JC`~X2f`05sd6SHHg#L+bkLu0ft;9^dL zOUmdy$zch7VHlO4Q*RP#gTh5dWeD>d351EefQu&nDzq)JalFJ2Jm{CPM?fOF94K(= z=YZn@m|nL^K@IbQ^ROwE-jVeYD}%OsXCg*2R&w5Q3)k z?O7>wSQAJE8Ms1!If>MX8O@Sw0@i|~_=@=8HE?JSuaujv=DVf0w7)YU75cGPDh3o= zwyWuwx_E-`(CxZ&S^FYwbOU)hf2LA&e*>DEOUz-+@3O$bU6y_T@(fWHE&wqqyE7LDJOLF7GL~`s zKOu7~*PP}~-Q+F|e)UB6jFVp4j4Vv+VC^MY!P-~94P7b$+xX3dyfWY;sbA$+D+}~& zgbiW-;8v2vet5uoTnJ=^9*F<_R-t&hn=YF-!T zw7f>kLC$?@2{cJF^0f-1+6z_^hx$6@V|EJp0+e5=S~tbR9?eRF6Mt!MI_^@_O5Nal z19n9!6uGT=wc<7-!J@L<+9nK3vv6V6R{zPYqBd;n_0{uv;s_Qf^^&n8MqJyF0}Emk zf}MO)W_|@m84magY(D^&Z1$K5y$F5?e8}mSI1c@7>kT@Gs3-_daDtkR{7}zt6iy2> zdk$ZK%pV68EChda`*FRF22L;q6o8t@*qYro8cnM_`DCUv@vwEh&;x+1-M4peY4QU_ zCF)Snu!+Od00l)97>cIU!s7DnrF^t_6tC$BBRY8NolswPL)B$}k>oo}skan;DJY zf)d(ey0M@Thp(4R(!|1^!xbievgL>0nep@O^Z;Ujq!~CrQE3ny{}?ooXNn5^#2I&D zd!Vxb8Bpnxx0vjEAnxE_0q$%K;l%@S@PZbI)}L?x%z)vU1LY@hPLGgl0J}z$_815- zg<8NxUDKoJcJ(v?PMj}O$lZbOFH(7a^`)REcRYmwEQ%`B_mJz=YSnqbf(-!n2361q zKm5{1W9sNi!3#yQB>K1}*k|l>Y=qiMua=sXY-Yq~h?U$VJ6LY&>;UAh7mToq37Eps| z3;u-M22V4&5~+ATvgQ00MrkesQo(?9cGqjlq*_IBLVuu{f=XN_0wmB=p>;(}S~%`vH5(4Vg=ZpXssHc!s2xQ94)$7>S!yK8 z0uxc78i6WgFm@kOjY@#{X%sM0bxq}CX?a%f@>Yq=F9c~%pB|UIe7K?oeglY{`U3(I zGe^gpTLjb=`z*(<;_&}=5viB4JrHEY0h-3n6U z0Oy+=jzj}Wu-;r`(g0aiQJwgXhmkS}=SuNvI$f!LgoqyZ%zpmazzU{Ib@SNxeD0 z#{V_SPN!7w9fl31Gh4P+GsrVW#q$gHlqWi?XUa|x^UQkkfQSA6`rPee)IBFrKoeZNxFk3J_(7NQ0 zF-0|DUG>!m|JX^pYKJl(o$1xu560UWK$479Oz7L)WT#*^}h!e2%M_y z;SAsHb1Xm||F~?z^vjoLt;hPz&IR$RKpzxfP^6p^t3+JWOz<;0p|INr0y#J7X}fx0 zRn3(Qfr=#i4`9+h_v&{Q9J$u89S0OV9%wz|E)rb|1n2NzdipRi1tv zn+vjCP^~qB$MlPr2Hf(~yaY^>w5H>)-eL%+*afts*-1hH)`WkLzn;RPIzJ=|N`UAB z6v&eq&Gy|I*Jw~gPEnxZ8<`fv`*%vG&4rPwG1qyBPEIG<&HUo6iu~aYc2A<)GiZJX z&}%b@%|-VYz*hcISeLiC#F3Y3$=$Gzt1B4m!Yk%}brz+s+)4*qv4L*}E2)jZqM1m^<6 ze6@kOnDl2f+~<~a;=vl?A7^sxkLm#OS2kCG!0p(n!W_%XTIy3e7%CX+ej(B`w@u9w z3=`s4uxZbL)SJ4=Ew6F=&6gwqO$}MwE}UP#`%;X4wQ=iO^=8m=Fi*i1i>(N0dGc<} z_{k5s6UEXVzhs@YxwAq0&2bu)>A2X>f5#VKLseTJlz^0_2Sj)$l&+&pwK^cWp^N0BKLIdwm*e?{kNX7*QTDQ&W+| zmDm2|Gs(Fi`v*-gapD-_dz3*K-nuOIGdGoEE8iF<*vFz+z*NB<6K|aZ^za9>-r=eH z7{~GKtPkImg4PreVX&v|K)92*ZM<^PugiPo2<0~B{{{6m?_Fg*FuPN9U!81jg`~GL z0C^u4;fkwFS{-*O02bqp4_q`IfC98`&dK2M$B=oxsK}k7_rW`QE*ibJI|v^Azv;biM|Xg8 zS2AlIW9ux%Y87bH=t}3iUp))rl+|Vk4(QETT&>V^V&bR_$px(h7t#QBzfrHj%96_= z?THXlBMasj#`k}eOqs9W(O!23DL44F7@W)+ur5oa~~0Gk@GBI%$Q^& z;J=;};eZ#O4n+BllQ+k-|K+t!115r5Unvy%B-Lnt-C8!VjDFw8r>b5M8^ zp>=2VnWY|0!wa$VMH8S8$*?a@?goJJLh(To3%NINaLeR>8{|34DhMPjex6!EMRcqB zDEkydBm|(rQa)v|a{l{zH|Qlua_zv$5nR;zmZ}5% zF4VfJ`4>&3ijD9E=mEsWANzN2_6ST>31~k7R1%+yISUk7Vw}L zwx1$QYUk|nua#0A#5UJtZ#kBGBH&EpTvt8BylsPF1pK*N}H1 z@kX2)jk4NW?UK7>fe#lkcSXXt=_b>q+n7>NJ&EE8a5)(;i-2jTYFSYUW`aYdTgyPt zJSXb%Nfvm6PjL`iK`FVkdcpbax{pfv%jEWt3umqIaZT-o^yOc?0lX<&kTwMG;NPJZ z+pO=>Ucsh*gx#>29zOjBd{P?3_VefU(gm;kIcrNBUe>ju7SF0z?_d0h`^_y40A$kv z)`p^=3+jMuW}Htf44lh=w$}=vkk%`}+hyVUznaQ+TJCW(FUGERkc3AQ?xoePU+Da9 z`SXn~2y4`;e`D?{pyW9acnQDt9E%#`6$q^yb<~a|1)IGXxX82cIddnGe{BloYgn%f z=bC{5P_e8V-98Y0xlR`FJgq`aU?J=WBCTJA|E9@?vlWh;JDp)=0ldr>2B#{U*b@ai zzeA{($H0Ts4y{E_Z3a8(lJAXE*OK&*uwqJkV=OfLQ}OID{l5Pg%KmD2vn`_eby zG(pL-X^J-3RZzWRuG-fKA?PhtosRhby7D@W(x=z^DbFbI8DzqI+1P= z4)uSN-@IG;a}qS9{vbRO3pt!`OyB}VKP`7TWf6pt1^&Er+7KDm1v+{*irfK>K3JBZ zI>HJ*Z*B!BZPHLS5?}o)c+695&DzfvDx;id*xEl4z|40Q-yoH6>;3C$J*l{J0L$GJ|1t?{O zeJ1ps-zVQiYBAe?fEK8c;2!zS(K|m8)^pO}_@Z$+!;GyW;*|)ld2Dn2KogHuWUrcc4e11T=rnpm zKT-J1lnGr|_UTywuRtSeOdd3AiCSk}>KTMY%zjoXHIOP8(fadAUQWK?A$J3T=F~tA zaZJ>!uN;r+qAm0O3V7m+ypM53s(Kvr`#N3%Pt`J{IU#ey2`?1^x?MwkyY<<1f8Cn` z&Y`&Yx%bkyG$c6^!$m}A-dsbuvOa)K{8@DYlZ@!wRE$Su+J~}5w?wgr$J8JUClspv z>mw8)2SoC2?{X;&)aY6HK~JsDmE?JH+hwekU|Ic_@AGZudLTLlowDE<;#QrPExmAH zVtp5faXHtG0cX#1QMT`X7|mc_>d6`keg`)F3BV9izjCT&P`jL~+H8S-u#%ZLouwZt zPR5+kL$hq#oMzoC0|j(O`}rpP4(!#w+R#?h!4``o6zYr28Q1pX;jv3qLf7>C&;Ak% z2v>fRSkl=o8iVt=RM+xHIpFGYrbLZfS7qM(8g|>Ei;!5N4T`&eBKq}Kg=Q%A;kV^m zgFzvEp7)6#zsDmx3u3DH256&Yvn?@PT+9BHj7qr&4KqKZ;LAJW~KF`L&&i$_lwxcJ5`#+z`a~G~nog6kdR8qJfj=hf$ zx3Ha0$JvCy&_^vh%Ri?=pwajmWQFc&@sS=m{8q%Cvt|P(LM20MUbk)d*Q}bI7;wef z`IbKvK6iuPwv6mV35)I7mPzZNd@Q1|{Xa|?x zJ4WA(Dy6jE1rEn2(QmHy=i<}(3=vb}#s zspHqd2OW`xak4V91DgUZSEQsOKZ<>k9)z4zJ{*HnfALj4O!deQa=RDNLg#X88ECvXDpO3R$V1#!l`ydHW2<73VR3(k)nDbpJ)U zlELa-I{dc-P@_|>O>#_frgUK&q-BPx3;K9}EzKQ#5Z8eXIp(!Y#1&>vC3tF-y@1}w z-4>)5Z!01{BRToV()EFpfh4ACY9wapaivngynsjZ?Y9=)0_@9jqxBx%X1%oYi}Vj? zYzZSfjtfY7oF;CT+@$MpvGCT*v0!C6Arrg{(J`-@rc893raRWU)eJ%Jw_dM74&eoa zrZ~8hdM?E2s_eO@vIZ&E^3gLUwqa8b8xzUObNhpdPHaAHP@^P+Pwo%3e|T?qdE&Mt z(?vIc=ua}}W;1~E#w4bYz;?;OxLyTWt7SMB`2d*>KeI1zw-s)qBwUlT#|M#c;ewL_ zM>>T^P}}}<{D{P=ke*t_VF|G z7y8-F9Rl9`3+X+3=;AVV+z#w4$Wx2W|E{kJ>GIV?vA z^Eo(Y-1|3;3S<4JpsgGB{^WD}B)#E3>LG~+G-H;f!|sB7O!w-V85u{x!aF=1YRE{i z9=Qc$4$C9LdM2D87icSBoba;Cm0eg?)!Fah=E8kWgVg>hr$2)KP2AO9jc6yoZFAqj zy{?*V*8=qn6N;hf8HoG84>tmh_>*@-Q$DankLn=4GTYvh&7*GJ+G@tjAo6DM49M41 zn>eiq8V2(0R?vOVvcG?@shF z1Mjg)E6p1_nA{iRQO|kO99#;{jujy&P6av5hoh_3cx$2GX~a`TgTn>$lK1e)IHOx{ z)0fIkN%L-Ua_GUGvT*8pII+og>P&8W0t5|u$lrIqXfVi1A2YR(nSx~DDna;M`lkN; z?!#|z!}8<$MAh zF^3$Fm8&5*`8J_O+k7#|0@ZUxa~SaQb2-{63);05myb*W=+EOBOGb|A%r?B0HOUvk z_1!H8TROjtb@Anr;SXjk42VecI=ldK(m+j3aAHxBJQ|_6F!>u-R|)xB;X1079akz` zY<#BZl&LnV1%g9#P(LtkMX$_@+ARH*t}zQ8r51i2+d3d6^sxFckrN^GSmhl-yq@~A z>6?JFkb7Kdv-B$_%{bS0gw{5DXJtz9zLxg~HJ7H-GZ~X#JgZh_T2RukE?c6WVH4kn zj#o`!uf4wJ|ApaY+4{M$%F}&s=|1qbCj5P^%N^eJ4-X(5zyFH=-^AU2>a`wA1duKO zzsmgV{p!VkXYpQ7t7XThu&=>R8LpkC=>4IumcMD!N6B_zn&xxrT%kB2YS56d0faNgdd!G@MfoL&SJdL^2rAtMsqK~iJy~qb*r+Ib4X!J! zPD%;7D60<|MS#dX(EN1H8G`e#*6=rt0KxJ1bQplC-7Z?^pLp~s zsAc=JX8Hs3de7|z%M@oo@BZO=cNW2~V8Wgzs9!}w%z#zpY&^{?ofURrdSCFN z{w{A(%lmpXLxsYJej-Q25!@qhfAMBxz?K3AH5iLsjCa_{6FIQ2)`b4;exN4~)ytQ! z68s}fym+wMg8a|vddlZvVRX1V{~@mIy)K22p?d4j{cIrl>LLq_rr3UMbzN$(3j&M# zUMt91Gn)i8I}VHZkXm-xcP5!KYO>fvtcymU&dxO-YXsESYd?iSaCoS^eybSMcf@Sv z|7O)yM4pPL^YmT5>~N1SyBa+xYyR7_g&voUUQ77}F|&wvg)f^c`w|37MSd92CdoYW zJaDJW7SgSg_nOT&c;nTYqFPl?j0M#@!c_fB!~T?ITBRbfvHJBtq)K((Cg1|=huZ=P z!hD5{XcwdXOGsJl=*}4%@?@(0k#pgK_A z(c%-W)-9JRPt_0v#OiSJy+v}B-3&4Ei_%DN`nGuXHpGx^J#W7$UE#@B7*rsh@e~iiJQ+) z4h&@*{f;Wf*4x|PWMF?J#M4!~wsp5uxVC+us1=IM^a=A*@ttp)*87^VHXZP?p1VA3 z9R7EBDqnzYN%+mU>>O2k#{@=z7%a^p!~-;{BZwSMkKzGYgznhh1Kj? zhSbmTn;RegljFN=uU%#9^uwx9*I}xre@Xf3;1&{^(_i19s6i~F^Z%_TkVEvBwkYL{ zOX>ms73^IUFt*JEx;3HY^#|vfE74@{OLQe)@!g=2Qum-@mEhG7P(T$Lp8N3Z~Qt5PU}001pX*Z0*c93vD?W@R%0?toPa8Q8_;017Ip+%vjDfi{yE8!88#cZ zP^J%;jLQ1pRxgC1k!;%IfvzWCM=>6`KGr( zCEhk4j{m1<%{a|cA9AVpZN~O|H&-)$o9b|Io0D<}8yjky+`lUr#GJPz+aa?h5vfUt zchYYIB^9+kT{@dOFuk+h&tko@Qw~*Q_`Gjg*D~f|$AF<`LGP&HDPQ2T&g;GqPvrAP z_jcy=vt8GH_<}I2)z;XxK@9Vd(4>#tM|fmx{7v2h6t$-)S+g5l)*1?;68=drA0Rt* z@@8d>2W$s>5n6k`w$$p}h(BS`@|y=`2j_Ob|2$MhuX*rpf!5r&P=wz0OU#|5v9*pvYs2I-khTA+bkpt;Rj5V}15S;g_PHjO{ zw+y0c+2AI@*ujk3vX;_V*1tJ;U%ILnlGf^$1c9Kq_Oh9IIM`y(4D(KDfonrmJMJI& zDdJYw^j!HUm=3vl;<7|GgGp+rM(OzVJYZw^n4Qn_qdsnb@;BcFhHIgyt!L@6- z_$nulHyT{}2X4;=$Jc1_InClHlVDxqV=@YhDb=OpeBT$XJB0nJT4kU8vgv%DZ z82>k}$QO#p#CE@w_%{Y{kp4R(w!&CDmx_pktwV^q z^M9E7>bR#~<`ilsVjtXm1__`17pjtDzwS*=rHWV(&ow$QqvQCvlcd;z+J#z* zuZCNDRBWkQDVPtEo&Ouo@v0@?jg{)lGSYvez35!y5#SeQtNwR_fP6*&m;+4D)b?`N z?sFnE%ZOfz=1d<5j=p($p72)OGBCC;EtmMP^j-?5KQvgCrz$keBam!c;zI-|ESUBc z0|5{&0M`3Z%d~uupP=2aGsSHP&`nt{=vn2%8R&UJylh35S~(3R?hr~6R2 zkjc4mAVfXM!rrdL119PGy=x>GMU715`rlI#szLaRiJ_7qJK&i*?uVn1_0cD z!jZegEaMXwMa%zWq5maVDof90q<_CYT-VQA6nEznrIUZh0UKG=K6IBQF;!|2HsS`b z&INn|M?hbt6Vt)$U3bdmzi9z*;D)qDyRjZZC62w+j%ucpN5Fcj^EP0`S>FWX9 zKNkB-=av#^*e#z+HKhGM1Bm6@=qMJj+o4hc%Z>;K2!YQROfFVf!O5f z<|t67V#aAO3BjD5PNUSy1Jy5LUZ6fr|HeIL2|fE!;a$<@ctt%jSc+ayCUkNbE_-R> z02I!W%K85rd*RVRt@hjmItyU&4p0UFyKF!##HN(Ns>Dy7C4Y`pAnH!Vm&tNS4mjc( zKVT^YfMyK%lt(M7>%w%P9!!&R84ff8uwd9WMafxyW_*#A*8^D!=vM`69@O^Gmk*9t zJP(DyGPFBsY*5jyRc!jXFiDL;T_A#1b!}~u?w&%Rg8(Bn*7`)c1S%c?&KFe4TZ8Z7 ze09g#Qr`d;2mtp6dKE*BPN?hwJir5@iVaq^$b*|3X>4edaH0hOW&kM~Q{78d6_DZ1 zPS)EftDd$(^7zO(4TSe!D|0Cp9x6LueM=d%7{IOouZ_^`c8g3@kwEOK7yjQuq|z3c za_ja}3psc?tqblL=5NFS}DuiiXD?@436p;N$Fy~6Y#jFzREwx)Wz)+r&<&opB`iq_oU5YY< zlD(j+SM#kNw|PE2lPVQscX0o%1`mq5^z7i`uSRht7`Yq)1kX)=)qX8? zePjIe%M#Effp}I0l_;y&$mSxlT=Y!`!d#SjwxiHq-!}y*?P!?`X82XM}Zoer5m= z=v}+U>ZvUwMlwxlKC6x)s;Z>=`$50DJU;_xrLk$-tnh+L(9&`lNvkwx?mZ&ObI(7a}N8|9~8p@_>)4bH}Ag^Qc%CS$;UqXI>7emn!C;R{|tFkP!IEcLEIR#jS$;#0hqG+n_2HU#+SzZFwO1gKeAL z;K6S^ zrl@;i1D-W|iCS;xOyc}MCbHSzg+Y>~IgatOjA`iJdG!n zAX517uixv{r=he7wk;ux9PmROp!E%!tmQcO1_w)@r}EHW6qCmmJJv*-3TCiJd@n_y zRZesde)UdC%;~p)UJ~`k&~NFz`BcvC`_eDY?WJJ5tD4uY0aPm{0uOFlHQ+8eds0;Z zEuIHuN+{Y5quzhZJ*Nv-}n{I)0QYH-2ttnkQN+9zNZK($j051?@$TZX6h7b2u5BsKJZ@pxC;|sLa z>Zl#x8FcOktsMw`?(f$zSXWAQ_>fio!4O2$K4^!zGOgDFhMqpp5$EAk2(5~FZ5EB2 zKC}u2+M%G(hU5HQ*roEfe*AX3DE;W&ma*#lStGJZcPd#>R+y+&W&G5W4wyuORPTu% zx_FE;skTQ>YiO}-(;2d)>AhIFLPeKAk!W)-n0UC0MT^HHU^Y0cDvwJ~KKXIK4uZ|> zyiqgifV>67x>8^}fty?GWI3`59y4iX1E9`Q3`{YmoJr2rb;Eg7e1CXW`D=6(Sx4lj zTfj4x2?G>O@CRY5&ftR!5o|sYii1j!_9VN+oo|OO}R+PZAQ-y z;@be_tZhm1%7jk&&uuHAM-sLyK3nzL%0mtTNg5r8@{_WU{lnx7^ZC8;`(;pEL{to=2A<8>elMDR|l< z=`3pS!A*~uEm1aWHH|D8SK_34b&TJM6kvmyIQ|+3b-3g>!GW=WcXqZQ@4!So2gpY1 znIuAz0bh+yF)-J;?U_SukokPOVJhGCCn^Ho(_(IyPH^Tg{q|kv@2Qj9;20V@46*wW zHO6!V6!&VxMgyZ3e(ejvv|oWVQ4l+|L+~9 z2+tXxy730qj!kPXcytUS+8xeEVy=ulBXy$}OLchms1wwckjhS!uSohk*>QO}s8*yV zHDT!~<-*bpAj$*cCg~kLu_vd69>?Xp1!vO%gc#3E8>bqtS>3F-)^CCfMkGkaD~tin zAa;#IgdmFJDdNGS=!bI?Kyi`P&YtysGv}68Z)=tooCMl+=Pps5+jy`*7vxyat2ebp zjb-+yLU|z62Z^w~q_Lr-O9|gn>K?b~yx9^7b~dt@gF~ebLJvQ&rpkVrRl4!eEC2a% z^zYbxW=i8@QU!i@udEs~U>a#*k9p2ChSKXhf`_4wXK zDZdkuA)c4#PURDZC|$w0d1*PW(igyv@`hV01$NA+uZaBIq-WVnz@?WRvXS8GQGLPu>uzVIyppmI2skR<}$;$J)fDhW;b7mt~Gcaw~YrS&7N9TVKiG9Et{f z{(GZE`#PnT%mE*JfoMz{!;!8mRz^VehYj{nAlBx$l&yFxW8e#3=p-lr1p8a3%0Rs( z{F@9pebkhE3ITKcG+htSKKSP@H*LypJ9V8*iOkCHA0-P_96IAOPI|9VpV59)l8@r!2aU7jUyj5p-vz>^Th#bk_l z@+6;Y2+DZWt$Uj)k$~L6W>z9M54oXf!z=ojG@CC7JtZ+^uwE3a^sgB0#wk9HJEF3jy$glyJV>-3|%GyDiDPo zlcMrgp_R8cQEiZ7yJb2 zWkf?rXbA#&Ewe1YU{zSpTHN;rFnV*WVNSdTcRH*o#-7&u>L)Q#-%RiNBz)lXfZ(zs>~ASZj9duU74m+-~plwY9BuH$DtwR8S@+(O|}Y zI*ud(E)~!%x&0*UBs*W$M6~|owzH=YGB_B0FiTHD1x!;Q!o?`y{mH1yaLQ3cR$hO5 zvKh@`FqI&zQUbE$Z7>rd$4MkVWA#B0MNn)-vMeMuxyxc(pA7Cq7}7o4{Fg}HL1f|l zMKRx)F@mU3LVe5&Sd~umxJf!hoG|=018ULTy#@22Q3mIeZt`cAHG7AvMe9Z=3zP=_ z{ijKbk)dmz&#m+N?g83AH#;D`Ml3oli5w0k{!;CPfG6YqU6iKS@O+(`KiD(kFK$s# z>&yQcB79-$cwJqz14pGdFY6l;kyl*x4cHW!0mFK;D?d6i{00bBe~*=s+aH-mkRdW! zdbZ_^HG4C9J(PY)b=?0J2mf7Sl$1y%j%KA6{zq}yZ4E0RD9gj&89&! z6=8U(Zu8+#7*voC0ggV0Z>b{34wZ|)XYyqJR=S*EiZ`{eo}z4SFK<=f_vSBd&#rUT z*Dx|yB1;hCMSgE#S+{p+NSW^JFU5&|P{%XIzZQMeMr>?h^-}%=LL)E8C-ak=dH;z*Y<4cPrKJSO_V%vvpgY0tZoNI8 z*;R7DC!_sr6<|3HmZUDX?YK01=>5IOa1)Xflpo5jLlSvzK7KM9pyZY-b;;eq=l%h@>2|8Za%cY{XQRkF22zI(jrg zd0H-r3j585eKHcfLHD;(d$UK%KuT-De1DHX+c^{Ehknk-9KN&2&ce0tlMd~QWYeL`9|_icF97pV=&EsZ%=|&M00*icF}@VvpAWai{_%i z=^ZG&8r5)Rd64y-U@d{<=wx;Mu^7;q=NQh}7mE;&m++PzJnbpZ0c;DK765_ZVjSON z0cGU|ps@oKNN5fLV3$F5o0@KOoV}<|Kyes3-TrnwR%0p8Z|d!Q4Y_lB(}r@-$9!M$&?_wew8FuU>P^cFNZI+CCQ) z`W+#)x*vGgOm!e^y**2PVb2P?qt|7HqhtpHVVu;q5B4c4=Pxi}mRRpkKVc{E*p~NuVUplS z+td{retfldC0RTi9BOXqnoYvxd<{^sSc+qY0L*osK)egxlKb{HzXHQD+w;pR+6KDz|E{5Iq#;tBqt-7Ilh? zDOxjQ1_38yK@JEL_%9F@+MPL^HHv6=1l~Ef_p8m1jt=rV+-(Cmwvaz+W{0ag#Ic}r zsQ7U}4gd%Vdi~A98bN^{{3GkzlmUp!NFrtyW!INC*CxwdI+~WX$_K}Y({WSq4ndD1 zl*Z$&cI_5l(L_rU_)Z}v5t%+ApC#Oj-dQ(SP>Hh|uWJru4(DYy);cEIM*_nvB;BiN zX)hlKP|mbn#sXv_6L}1Y)W1?*ucJnxT}4F|T;lR3*Y{MTmx|W{)y#n_(2loL8CJ1xTD72+?((xu<{IOZ40azd zLo2A7zG5<2TJm|}s!MrgwF6uf`&nr@Lbh0JWcBIH??<5Tp7NL1u1d!1d)t^QIIzVB zOdsU~tRN*h%L~6KS$W5{>O}`4S-}0wco6`3ftdB=OJ)ON$}e+HdG$+&Na2=1F{Oq8>7;k_dMj2MLFJ|XLffuL(M9$2+(jkm(PqZQc zhYzJBnQ##S^}(Ruq7+mLgEDr2ykPot2{<}t_PujVl9#4=1Jul0s{!iPR$5H_%(j%I zy!kp3Duzxr*2?3t*QG?p#A=H(%ci-7Jm(JeGubVuUGhSdFfxIbOhdd+5@q?Zia;qY zc-HrcAeGCCg3{N{3wp)H6{X(eO4GbHAHMIG4Y6yO6_?I}W+E>C?ow~2dH3L!pnQ1} zfwBllaSqoQ>-j6jreQ*5W{tXY4lhJ%GK;r3&b6RU)7bjD{*4t=negcZXo~@i5QXBP zMpx&0`q-@rG_DN`OdWZZ1F$d9f&;3_p`aTd8g;cGf7XBAaV${Masb?*eVe>~C-ueE zp5AvRrWta8QV6^KW8yj~_E^2xJKo3rySASIax^NT`^U#BZ-$P3;-qBH#Y?brLlPjR@jbET0%pJd1utZaHn$$_I<{an(WYkZZ&<2 zuIYi`V}fPNcZc-LP5F?|Fz2@1*bd5KHVp$hxXi(z%aipPA-V5S&6d}bn|k)Lv=xMW z&ria}u0@y+V#l~#)=Fx1N<4T38SS!|eL$@%06=5Hj+Xf&?ifm-a`K}BAVc-~9N5!> zo*!su1iRCYW6-R%8*EN?1)x()4xJc#$hTuD&!Ob6dDkQw;s zB1c1}J;!;9TH<5w2^?GiEJo{%tqd_dw;>??s91C-;*L!9N!tXp3m{Bwzdy9J1s4s% z=sv23m_xKLhCJU&fC|=ZaiI2hNwhYUw1pe(b7kcyst4?b62TrSU(jkPg;eE~YVFb8 zD_3E+c2!~H-rp{Sm&+|!yAMOz=Gb#QQN*Zc2zGna?}8|<_w>S6rKJ0A;DRe6d$T{0 zeKiNWaWg8|%}1dS$StT_ry^E5%+4a7vRDT6E37rzsrke8E5%Y&YAbm+EMf#+bELJO;pKX1X3Llf`pvUv=5p{OHj~W!K81Sc< zSm46nCRP10jwUR1&-X4%?W=nJU2)oZUjelDv#o^E$nb*(t2Jm-ryFX9Fi8m#aA{k{!sW~8MR6&0nL6v>8h0rb*0 zqi_7)_>tz3=j4#ChrA~}#|voxYlYHD^@g1E#c1e__l)b3Gp0Z3oN4XG!}?*Z>&2-5 ztu;|;g_y=yg{@|nO@4Ca2UC{!*i+)S{OH&|ZM4_JQY*&My%Wj>iY8eWGwXPtzTn3& z|HZhfq2FUiR+eo$AW?C1Um26Cr@yI_LQ2q6pfGZUd(8TDl}9y zOyQ5Vrbvmmvs>GGWF}Bm; zca});P(rqs>voHU<57dHRr=NeI^za z$Kp&8D}Dlk1qVy8M=3x0;7@51`i;NmIAiQIHA~mssmeumVK4v0><2b~eY`yDcD3AO zEV6$NRhqoVDW?}mdW>k}oV11gt7yU>iF|+Jh-zbf5<2H5-*6t|n~t=?+Qz6SLZ+kR zp3r}5$hffZ&*U$V|NC-Wfuf!dq8gw{CN^;P{u0^%+Igd9~9nun= z8ax+mcwY!Ou8UaU$vjWGj~braTxPgDr+((LZxuBzy7ubmcndyp=K3X@H;<+WalDph ze_Lh@nU>?VeJN#BY@n?AtZP)53{BSUga;`Ow)4@-qv9_oBmoCOfpVXj_3EIEs0e3g ziZTC9r;JP6}gpDuRF!ZAnnCuM(sU0AMKW%=#xcJRX0<=Qp^ zLbqUgp>wVII?E8EZU#hvsQE-=+t>%wLG*9`M*`Gk)2x8OpvQT)C8R>XqW3A?a`NGO z{>vRcvwsdc+!l|9HjhM8vbhH%MQ+&Rdp9a8c*WlF+rH)G+^~wx+w(dWOpH~Qy;DTv zhC48Sh|_L0R*14vXSNH+9Sz*05!uJz_Ug4(JuBms9vT0Ketc>k_s0RsP9X2Dbxj-T9k%CVSyKZXHNC0)m4s@8f(r`}OnGJ)2T=zF zu@g8`Jff_hjdrV77}%2C3PM=UQp;vztM&RR?^HN4O)rbv_Po}ZuixsHIJP>Yp)?h+7P92Cc zwiBaJs}-h80giOUr&4~kq0wL7NOfjQ@FvSdw+qjn?7XuKE1zqEIF!@3Je#ceuRM;RxNZO5~N3#oM2Gr-8v%}5IO0( zgbo^OP4Kq#n;7mSaSjP^ow{13m8eSMw;AossaUX-#xq`_B2^GI7I-8Wf24ZG``ahL zrtXq@cnkB|B0GZKW@ee)rV?-_7HV)hZ^^zY?4pa!>?Ufe4LJ#ahx1f?J6a4^jlxHL z52+!em2q)jA)!9tY58j&Un$vUm)&9fXOU6WQrdt4tjWOW&T>|agpc(&Or#wD^1@Ja z*7$GMoASLS(>=CabHlkK+z;^0!vodNpVh_>ea=V1PPU-N{b7=V|-NYjI z7?MjOE_l-FzBBT3RdIX<3BtO|-5+*G%ZwQuVp)S5S%GuZuBYHCM`mjO(Eaqzl%>T9 z?DMX}wH>>2lJ*1c-no~0o4naGJsj_}R*bD|rjsqjM^Rv1mP%}X^5^iptEw9vr@ZSn zEu%=6HJ@C7Ct27^Ha?lS-TJ9|H{_49dx)>uSe54FAGBkA4%@f?GQKy|pbb^RUUG!7 zH9u-6En)OH){^Falq1GxDM$_!uAlx$SWwEzS&ux#;)QMedaw9*DL9`iDt1?X9!(fD7L{H7% zvV8cNa$Cg2FKW4tr}5rHByF|@6Ue;OAB900ENhf;3%9Ki8E?V`{m6uZ@8D{=?Iz0-`*KiWMu1S9NELW3j_!$yDTYPn~Zr~@6 zM;KQ==N%+?5k!1n53ZAI(a(MD7qpmKQeFB)I~<1y$L^Uu8OY;1G*AyWTb&+oRkWNl z0yAHBHAtY0>Mbvy;GB~*3$J}B(yxk0gL^Wu{m;?#0_QHFTZG`QiGZdqAQUPMJk=Xr zXC>A7VXepDGmgAe-yy$hQ0-*3PzFR@bHUju9xVZ(Hn+4gbR}v>B$M<4nRhw`b-GLU zDVn}<6WtnXT-=(dy@sR)!KYiE9qf|iDjcMYd>geWYx)=S!5JhHPdps>-BUYUlO%Oq z1)4GKm#hqhDI5r9C3QpBH{Fl9@@S8=F2DMPO>`u(k*S>>de<$(8R_-Pa4);_;$=njQW&B4EhE9n4Q^GxwHC_! zI{{~_Pi`Ax61}Z%T^CS;b5)>0NXzB#fQe8};;*vobIz4OQZg_J>y1CH#;S-15g2ZTgs!a}JHER4V(g1%$5sU|`Wp45akqacBJy*F-ek61i zdbkZZTJ60c%Fqh~`rMn_F7ZTH)2Bau{qtRPMNgZj7L_&WIF81wsjdw{_0>S;wf~kY4h{=#ewJC)S)QKh*0h{V~D{@>nE~3W5borDCcC z7W+3HeN|4TtAv-U-Df2&u!VlF5>@#(p8F!&Q1<4r_d}U4rM3y)ZAfEVQ=xSvxh`=m zUpzT4wF%P`oF&_|?Y>``MzrBC8hN=+ohS|KouGkBd4u72;Y^NiSl{d}a(brIQVr&$vRyc%;f-^fhFM^H++vp69EKWBp0&t@GA#+cLzB z>(={$vgnY;ffMqlu&X-o+lZ$ElZ3YE`PviVJRLz^t(l>3O1qg2CqbsW(Qekacxnb< z;NGO;K#&9i%q2BHlH*@#v%04jD%W|LF*uGpb390Qz8d`m4C~3O377ZSYLa{k{UUr4wl-^y@mdkcY~Ri*2J(XwxM{ zRUkVzZc|$Gq9vNjo!lVLp2Cb9;cFn9*Oi^fwF@oCd*rGp*5;f3x2yb3kcl(j=?k-s1yqNcxz>U}_im|Jo zxh6*MX2`T#YTlES*77+ASNJj`?=MV@UkY6jiEEl$Uwy;%`1Yg{aCS>-TeCZQC%^ti zFES*P9roN?W&mOm$+%!f;U{@oHgwS1vu0z`yH;$k_4UYYHnifG*tX<0{qbq+Y7k*> zH3D9GpN``4M=dnbyls0{pBp=osP(zv?Y~3Z$H4m0Qj`HRDmL7)J@+wj0Z`UNi2CHb zjn4f~eDfisuh@|dPu?fN;19=&0gS8dGh5nV`Q+c?FPZV;kj|5_B5;X}qv*PNn9q1R z+@hYhmFv)j-f@T;+xN?lQO+>uedBI=gUL$Ll2u+JVI|SXVVec)OzQ%rJs?L--i9?? zv3p7(iz)Yo4)i9+__GzT*RRYY={X?DSC_|BU_O zt72?We@M4ta5?q^E6%L0;ri60QiHBHW1?gOEosc>mDAya^?cO)n?k>NtQEJeOFWE) zEe+XZ;mtmQL)sNHY6g8TJp0|m6GdHN`V_g$Gx4)6c_m2CI8g>e78GO2~GX~<+fWS9IsAyEP>QKEa z`K~*gk#~TI9rMI+7)FdL{K$dFIK1G-hQeA7`UrmAHk(<;yro_i()c~u5WhLPj-Kbj zrE*Vk0;~XO+C=a6jAN_lP#0P;mkr{@enx}^%-t8~6v~Yh_9u{<&MkxA&F-~Xf<_Hw zf57lq0Ug}bR>3ni)P7)SlnCACNyn(pk&mT8&oT>y>2hGq(zCS>$6sQ<3mQ zQk(2{DJ>dM$0D77m`}aWG>W3i<9gUy7dwz2e+37lb-Cbn{1xB*&=iF-k1Gi#ReKVj zOTyX7d}m|?h;X;%(Qf2vlS8pPUDCv#pDvL5px}C!hHRG*d4_eABnUuW}HjMY8v5!>3p?cl% z93(f)(k*a7gkXGezV{LX;~)o7mQ%PWVrf7rt`6pe@2gNc_RGCiCU;z1j3%_NK<4GtUP35Yp$2AatEyO!Ll9;~qC?c!zR_3?$&E z`frLRQ^+=dP3HLUW+J)s;%H@&F_AkegoRmGR2@~X`Vq-m8Lhzjez({eXy0KW(S*=K zpI5WrwzKU0{W0dtHD4C`y`@_bH!aoo&oU2QX}(R5p{Uak`WiQy8mweg7!X!#8vBfr#{PcuQT~=b)+t<8q{!wa{GnEt#N*5aSA;?>b{Nt zNHp5iOGIVG!ga(Iu;XYgsRY$7I4{Lr4!od0;Fu#mi=%17V273q?wd#7qCcz z%pzL0NEo~&Di&+?!Mx&|p2^VB?cWr$u1eXNMt!Lu)Ioct`d79qTd=nfYcdJWmpf5s zkcfgm`7>bL`4tR0!Yr*3tTpKUl=O>~7?!v0f+0i1MF*s`y#w*b&u0g@p7Twl?-X?F z9A1a?pdfY)lipQ2!H)v{HrWT~drKaJ^50(uJIOo7nTV!Ur;u!|4 zL${w_PoUyPEEzzLI{SJ!ut;}gL8BSWx=sOmJR#C+w(6&UVx$Rh12%<)kX^rBWZ30* zMHsNOY(8AKGXCy^BQrnP-9kYNJ#tw5&h&UHGrDkloF9q!VxuK2$#IU)h^iVje}O%2 zij8fuZyM)4wuyy4SGyGYHS7qJn(t0i65C}+?=ixQiv=O3c)nssMI^v?^OP}g{sIH6OBSbG%Xx#6Yy6-UedT+#*Wx8i6zyLAccV@R z$my%$vK>MyVeP}m2+j&C6I8D{Pyg7(@$#5Js8bAszhkAZ%6O`u924anA^OF}sQn@$ zf(889)gmfkhMSje@)z_hP@7y2E#jAi9H18H97$r0sGilYq-^ZnnYj9Q4h7qP=O$o) zjg#W-|NSk$sk^D;C~{lLo2W?6rTZ7xY-3C{t{m#K?M2`b)!v*HTe5BT3g@zhyYjf} zd@N4PM^ARFy4|yHdHly1XF?ALe?Hj*})odN^2f0t+CgC zQkA!f@Og-YM@^Z=YdYGPXT5zOn(rY+g7p`vD~ldp9yLH&Bou8}iE>(kNBu%kURlZ6 z5_>eAepweP#FS){$J+Cf*+c99yKckhZaR7uz&8&e#d|$z>mU)V;xz1=?muv~5L?N+ z24fX4J$=jT2+{RU`QJ} zdRUeeSk8NN4|UFS?ooD=o>9j-FCivx?vK|fm|ZP&1}FO<(%jSsWTo(^j2<|PlAadu zv#b$(C`_3q@->D#k!ui!yY=M`!6l}fy?rSe$DNcQwZYw`?uoS;nZ$EyK9eNFeXj`1 zG2~Ag7lFP^DpQb6z>I@tbAjcdzLWf_&;9&`(KdcR5z#@;Iar6skal3(pSC4Jcq+ar>5GG7+CL}N76JM~dR27p+IJxjf9Sw%IHnRJj61lmOoqi}9cIS^s+>Z`|0J>}vMJA8Zh-4X|il zbE?}vN2ElTrmg)L9U{$lG3XP|7|~ur@1iA*=eBdi_~d#SdkGN_j!I7)+dcSqje1-u zbOe+X(`j$2828=S8(<}8hH@2Vm*c#?Ia-M<>?ZP-6g&@PrPn<31Xlxdp}u${?&a~8 z@5e(yvE*p10c~D}^`m_pnTe*Wo*;i=WmV1w&jLFRRWw`mG#GH1% zk`32ykS|xtcU7&YfXHyW@DX%LZo*x0WE7bBO>Al{)kEt#f<0opPgZPyS(4n(UN$18 zNmMOL1=|{{j@%*%3e4oU{?NBn$i;7^m-upD6f>%h7>`iymfmik!z;(fMhq8=bkDaFPk!5^Nx*A%JVSQVWC%P2GnxsCPDXlhsH}`yNQ09}X=EP~ z*VZ9wWJgT>$JR0_vI&e~WBH2vWz*;V9xNl37(x2GFhMi7n<>>VtPf3Js#Q8MRt-up zDe@}i#gQuc)xj^m=>PjAs#hsc)-1eGJ|d)r2S*0()~MkN?=PpfYn!dkTSq!h3YeqU zK5DGd=#oF+(!Xpay|F=T2u1&-zSH<+!H<6KrT90kA9B!p8@^z&)kA}XN$?AaPN!I&}-&Oeoj&2-?Io^Og1kX zNllGI%E}1bCQ#48H{Y{_>trHH|4HAYfxo^Fq}@&{%C6!el7Z6TU+vQslG4(0sNGsg zT+u@Ry09yvU`NGaNf>ZjPny4sqwLpnJ)b=mPbQ9+T(PCyediM}?7zb&o)z45wV0_g z(5?a?sYi$aVOKTkh%rhA8hK@<&8l?qn_?YEKe!mxQNaaHLz!o!?Vs>u;Q5<`36{eH znLAC%2wi`NVD5yP{^n5(`(g_BOix$%GuTC%@1qa3t5o=c?S=4*pec)aj3#Z5O?ZH_ zb)Wb;a%Q7jjZ6|(EB8Pv4_ul)Nr*S+H!33+_h;I{slAF`&=EbH5=fw0G)dt zQdt&{?r%B#P~(2=>}N-}Br5oZ^IFCjWidm-NoVORdPQpk^`?ClX28amJGtXsyp|Kw z|6OAt1@~1YlWgl2&p=O5_UQEHETMI@tg(x9?$MyI#o0Op>dUV>gZWizIM`Y3{VQam zPj3dZ$$`wj^}hU;K@i=93*V)`K{Q50RAy|N3d`A#uBGk@LO=EIq@0&5kH!4#%2&KX zd3mV9<5LX5)wS>Pqe9Jn(Bs@AmXy%6YI!Fr3L-=3xC#vX!~KVmBpGkX4c&r&->3Z% z7$={!chKfgs0ypG+aF$bKsaq-Atbjglcjl_^$LR|#MhT)BXJ6xY7x(U+~FWjG8^~K zSwX8w{Y_UVBQo8OOGwWw(71c4-+_OSwG4BN?Pqr?1540!WEi!SA?~T;Wh943eTP$G zJS1W9`cI)RQCGmZeeq2TPw0Qv%hAx5ZZ7m~lo7k*n?}jL!@U$9Ye0&4%9IMeyY5Rk zd#-ybHjq+({aAc`a;^o8s1o%zDl2`_CbdtRN!i8QM6570^5nPnIh| z#9O+=Z>0ZbQZK@gqmRlxKs6GX;e8c*`_ZN@*_-~Kt1-EVnMgDl={LPVf!sXKvQ(!PwYnfK#60Hz~RptA%# zNK0`K!NXD-MVqp?W<#T6*EnuZM~qk(=ldUgmH1K?+2e9W^{;@@%VS^ScR=_eb)WOO zzqH21d+^&eQ6wY(%#Y|>2jfp(d-5-PPd7OAzyCL) z@JXcqrZ@zqSjzb!u=RQZl9lcHOHYw*^*#@>-FRVt zva)iL#4z`1h|h!r#6fjzo#awL9Q(r9H%78hX-MpX(DnGl3ddSKxg8`?{FA|jpXG0L zY`y>cm73W}VKvQuER+Cx;nWgxz}?RhY}-JwU1`JNu28FXOak>d?p_B)Xz#T^Q%+3n zrY_2D%vB;RSFRkMTT@H$(fC8lyGna#OOyeglef4c*_9pLz^pN7u*T6J)S~R8E+e? zdbkhhAK0!<6yuycIgLIk`~D}E%QPJRl#|vI(;)yjqY4wv3(zMh6NtP-zjg8lwkwnt#3>$0sR)8#UD+Lw{}HF_!w z$VoqlwqhpJn$`M|dbMXTc$sax`u*z{U-5LYf0>J_t&rqkrOY4UaXjQ{)WmG){ zoXu?Pi(^Urre44b44#i3_z;CK-{?}1L<3nEAfmwl8B4%vR1^_Qtple&a)SF`Okuy; zj3`qOYAWxEfFM9}YUBA*FcJ1k2CC)|-p_ZH9)Q+K7GF7gL;Ik)Qo0BkL0DDyF@yK> zyQ|!0FdSAtPslQE~^5{UU zWMh1fr7}NacrEJv7k7+2u2(irSHGh@9LzvaL{;GJ!qTKTp3G=CC3t%y&($;rf%*WB z_iqwKS*$kXf~i3EYTJxF%#g3eE#&&Tq)tfa=gfn*FRyOU@qMaD==&(+kFTtGl6Rwy z1?F?+;R^lS+pLbq9pCPKcvrB(#1GZfzL#T@Wo5c5k-ta|-fss4 zI;4zu?Z0ssg)BzU0k)C;U_BJ=!lO5SuJ8Rv^Rsraw$QvIxywPMGQxZ27V3rbT2O%b zu-X{(8ORPR(%zg(6*TnjIJ}l$p|p|b{Ei_fi$iTs-#~hbcD#feyBH_2=u1(JnA(k8 zDvIeHFBBKFQ&C4GbUM%De*0)dpOj^Q@_~h!MBt!+lZ~XqF1R6|1BA`x3##lggL-Uw zv5VBnWC^>f_PBrh!h^{S<@YsusyANrk8msZ2qdVeT@Y<4isM_0qQQn5rL5#w*l_;A z5+kIjIv-Mb`H&({fG%oa$ku+t?cj)lW0p$$5av=uT&=_JbYNBypd^$jskf#+%yiY2 z{tJuwTyAat7s$wJU>=`grT(OMnoNyFMEt7!3C}y&WJt_op10mH6p23RYf%QT=R3zH z$J+N@C*!gXk1nv*LTI7$)V4R>!Gnci6!$RWw!xLZai*X_Y`VSj!*qn>&navqNdq}P zCW6$Pzqeukem!kUJ=x!unMeVoL)FNG+XVX`@8m#CocIw|7b$Y*nZdc8!Az zZmrlR-My!w?Vt$UT62Zka(x=iiM{xWT2MU8>7$)?-1vf^u+ zbn`eB0Z%dT>sIf9i^#0mF=^Tzp(zS;S1-f6;FEo5RR5?BR~qh;H~ab9iO7uaP6}~Q z%z*c3%T5n6$Wm~Or7h#PjB{eMwv`No%b`^iNkZd=|K&DtgCz4CM7mABQxl28;fSv23T6r^sFb z$wws0*KsNy_w$WpkLH5MF#nz?3q>^S`-s$m<(T+Vg77wwP=QapmORdVnLjd zcFF(Y=_|vcinhN86$=3s0cmLk>5@i4x;sU>LAqO|Ta@k)5Rh(ALAp~Kq+1v|-!=E% z|NGTP=FFUP_S$RxYG=sY{3|k}Z0WgLcz(0UorLmx=9ap2Lkv_^ExrnbH~m%p2L&hk z@9);tzk#h)M`y!kcNksF$a<+tJ)xJ_S|=RlW3MIAm39Mv;fE83nwTI2YWJEjCvizx zv*IBFhU({^GF5tQMz?k{B~aWf2X0te`nBbdS_SvygrVynuFKrIGAr;s6KE5E)Jl26 zd=R)@a|lYWYH~4*d)QL=#bg)qeRqZ;TYt&%v%U+@cb$@+l$uOe?*BB?_!B5hd!68W znd%ZgB2Md3BN+|FTD3sWwHKal@j0tY0KEkNK45Zr##>k6+F_L(oubs?>?ke#_m_=& z!AkjAuRnfyw$Nfqy(x!NCRr+xKH*?MyDa};-sE!yI#YSq1N%m`D9K0OW;rwacv#VF zt|R5~Lg#RQH$BWAh|5Mf)-%(1ueFA(wr)|yxn&vodHu~L->L*r4zZ_BNcNSw#oP6$ z&r#C~E!_YN@7Ll1u7^xoWhG_gVLJ#!NprmTTZ*aDHl$kG8#7VdJ?DMXkj=j(X5wT5 zeuL9B1j}BuCpmvY^TafB*Q|0s;5Qg%foP!1o=rR(m^_J1Dw|inXL&2e@(0U zn#5zJ@uJ>6rbk4zB4g~Uox_w^D3+S|Q~PDM#!W-k3pp{0v+zBmf8>_x1+ge9S+Fw) z?CJt(lu->$L)3e5jNdYI{SDQGS>J^?_jgQpZV4lgwH|fzEs%0FW*1)9eE+ER(Cbcl z74PfukA`g1LinJBFkO7frAH-E_szcR4ZLa9zznCJAd{a<-ZHV91b%75D$%`lgx@M% z=K6$@I4d#e2u;;yt=7B~*6wJMjnfYUcL3N~e-2t<*_HCM*^#h@@YRWP#kRm}m#u8k zQ-eGcjA@SBd@kIj&k~g$arf79Hw_P)STfXEo!Obytv&?J!MxXn1&*^@k+f`rqj@Yp zZ2GW=3!eWi$&%)AI21rB4?rKPdvge#Lj0S98OAHchZ}YMOSboPGP(*OTD#2!QKji>qOd$13U7X$(2 z35ud`2o=pZ82J^z<2$n}`Y_fZne&Ee-55i1>6d}>M$O0Zg;ntQIdasjq4;OcY5TSE^V9} zS)z-Mz;Rz)!$qwNuAH-W)ASQ11`)Ge#`n_6|JnwwP25N$deg9`exKdx&~n*cTg!xk zTl5j3`#_JMczu&xq4&%@QtHH{AlO>D3Zy3ttjsTn3Di0qu0Mn+eGmyKysBbZa=_7++&f}c|`Q`KP^=(y_am)0zV{P6eSVt(T=tgtaEVt2cXHuvNCPPL2Y zdSSxn9j~WLmj?xoT%-ccpA$4+Q$$_9`zJ7q7N?G$l3c~S-aM>trS&}S(`o4u%Ot36 zJoY|v1{b}`W4`rpur(k3VMz9xv>dLZsObINr_s;G-p(YR#8uul&4a;T_=_oE%!_>f z+^3rN3!;*F#Z=MfQB=pNh5xqeZtVrq$Y8`T-Q5qWY)*3^*Lkj>cKv8_wff#(2*>Zz z2-}NYV|yYe>amB4dHY`$14`SR^)3CFdhLTpLFd6@Bescq<8l@ubEhZzor28Hih*sVu(76^zwVE5+j87ET_U_K{maH?Glitux zTY`vqe|oO-aEZ^C1l@u`EKcb)Q!QqUx;IFNcU{H!KHas?Iqt>jdTpSTggJdQSy=tc zLHogPfzA9)c2$GE_vjYmZPMNFE=V$WsJJ^G-aEN+3gBMVkMLAa*=&=g*=G3?LOQ7X z*g;EfM`$&i&0`iQ1fvpmo%$+9Sh=j!yG*zCy!ne(o0lEW@&kbH;TB4YE=^?(P_Y)! z(lD%Fy*_gbV-UL}pZc z)8lB`_21vaW@n^YSwvD?@!0HF_i%|K&962>4+%XZxTo|ZRD^O;x@o_qGEVF293@=- zLhQ_9V@o;H;_mUkZ)sn6Ql(s8txptfHHX)Ch4+zhpp9uptD!2`^-s%rylTLb(rresqdkQ#Vt9ka1!uZ%uEo=O27H7de2hyd+c2e1jU=>Y8>i_9q_#u8pkQXL*_d6%) z+Ip9`8d+U#prM}2U;j3Boa;F>QTNEin>v^Xx_=wmW-zZ%!p7P)^IllL!gS~4{MP;# z0+o^elDFp*Vs>S3t#&V3Wl#B^xWqac{h@FDB;Ep{eU(2=d){WA}zJ($Ba49&m}J z1r{-pO(nY6OGgKCAW?z>Oqw~Ibg;VbCT)jX=Z~Ml4C`mPn66aE%~D(JuepKVqkEJ( z{DBS!wmaarQ}Cw2!@Bo(>xQ!Y*hG3HyV)Ot~Ti8Nfeo7vORejD*MLsC$W6ZiKU9d%oZ2PwC7@XVqVxmk^x|E z-ScflnL3_Rui2veNlZuu$EL$B^k}G}=RrLnpu@8*jQ23+wV2fx6Bhn=qp#NWP()hn zjcAaEU|7qPtjS1j+b@qVw%V+0^^$%X^*cP5DMl_QcKgO89^8>+uyH)Q#3-&)*O4 zYjYYRFzhJp7y$Xn?m)r8dZvFu%{KcfXos2#h9gGYQE7u5`K1LXV}H3Cl;tKNH*S%R zHS0%WQ!xK0JNxHavL7^%)!deC8vL}CWoz7RabVuT;>%4Bk|3*f8x1JxeIxqBRwv%$ z#EpUGw=W?GvCiF|&U(E1$qesk)CxJcFA@^t4Dk~zO7yAQ zWfw=5RRn4ZEp)^1sS3VJ)@wl}THVXlN%%cm`zn49<>>;2wqm#hw>zSA+#({NkJ-)0 zG~BPc)5zXDE!UnGx3%{ip2K3dw37~{0La4%ym$qhBh--A^c)={uE?GoFqUQ5p3Uo$v#%OO zn;RjSV$pfwgIJcgX`%w>m3`cz@$TW?XgJ!bRX!2v=jGVZ6B~iw4t$KzflD_;z}yjC zs`qh-T0G2(#AT+NX~gsw#|=j0bu41(rt#4MDP4l6Fglu+J0F4wg=1U`1OGZSMw@L@ zOe2GW%{;61R_#W6Z402dKEoHk6yzQ%#q6lWss&frOFDPZR%NxAq)2k6p6om#n932OmW2e8;6 z;fRFd_IuC)8U(A)CQ+w9K^nKU{s29T?)#lak-OJJvUA0)nC*VC>v58hvGr90BX9Sm zP)S0`A~l+A8fo2>K6~BX>aF-V{i`Cu8RfBtHDEyiaO%9G3Jg*Dl-}kAVixH7RnbhK zv5RKh0=#N%9U#KPbl1wRGI0i(%ctTBR-29@eXn~11hn^)GsV-Rb%C$XOvgtR$~ksrxRXNUl7>k#{t~;#xT?`;?<2Hq_{N60f>-j9;>-tP#~PFG`uT zA3#t1=VTIK8A=2GA?}ex8{vrPyS_=BdI>kWgNjlvr9N;JIO{}mu}CDBAg7NOQj(y> z9;s$D+!~TK8XZ5^m$6+k=st~eHzc01TST*WSX|yAk|KQLIAo)QvzPM5ZD%d~!Zr1z z>uOV1hs7!6&a^EJTUGEZ?u{IH4+0yk*CgvaY=j*~XN7TSW#JF-Zjfjq&pu?8ErCFC zHDbUA8W|P$Pp9GrP>_-u?u>4X3LK?&#tGAKfeQ}no@~>)#l|XsjGBzBtu{pj!Ph)X z_4_V?N%7Pv&%F0CVlN;2)WG(pEI;#oL)>?#H718N#!Eh55tRBjFC2IdU3Rz5m>zv| z;Kq3!%g~)#Kjl$XvVTG(eHCXQITLYta7uKP>d?P4fY|GOqcT>$W7rYAWwLqE${@wR z6}k6zpcbczxY}*0l`_`49_=?sn}fDzd`t69#8_>jI)8d2^MHQjS;|$)*)Gv#65Koz zo~RXMqRxuNn|0kQvOm1J+nbV3&5Nq$=XYsp?kZ@l`1E0A3WN{oURmtJOlX zo}4Gkl#F*(W_ja(Hmxw-qn{wt|4WKg`KM>MU9lVyvY5Pfx^+`p4ShaF{NqMSV^4R% zOo?Dbk}TlaG$zWyg18hP_5mJg7|jvc}lq;uiJ|(F2meWWzKPIJ1-(*!)q@^`kJJ!ajy!w=x~KJx`o@_xuCy+vqS7^c~aSJ zwOf;0N^e2Rg^eY%@|h^^p(MK1=n!xJacSY+d;u@^o5)vw@6S`+v2{lTE!I|3&JsWB zRy^hH;JCtqUo7RkBR@yx9N$dE<;82$_bdLT&DGqH+72BV2lU|4`^d!D@`5pbtDkl* z|55iZX05~cugLsGO^{Y6`P}C9mXuw}er2JSFh1rd@>od&*)cZnOFJA=dmEau$9fa8 zPpgPpVZJC*k#5k5&WR!&nV3;JTf|(O0m;D=oeR|i`@yY(^@KOYj*!=j!uorHDMyk6GSMczh4Qk-EQwW zG*I_DJtJ_^7tKI}Jvy&uUJ#0|xPJSj-H!qsn~4ZpHEty_--z z{YG#UgSHx(0yFq*Z0(H8u_EOKX@C8BosFQ9^=#rNZA-2@&*4*!2KV3u#qu)6T!1!<=!-jm#xa%UVD<@g>5=mIKgSeDM&5xVoa#% zme5T}^f>p$8|>?!6l1h07wB$%%t=|4Ou7I115JxDpK*BL)N^$$ZG`}p2WSq;7hTpy zhnn$CwX28C8nqjmCet>`PP2)#N@N2&Xpgb8uif?&q0mW{?+MkmE2DG`wyGCrC|OXq zQan^TolT<&`jX1p_R6|_(%V)ASQ!n&b)MQ=l=?eSZQmx8UN_xXkJLC; zJiPx@ytmN!lpm4S+>6_wfAdz&?u1i8p}mh6_R}`|Ne0F1%JkY>3o25n8xfL)EY&oP zlJQ*!X=R}G@Tqsr-K0{hS;=IYIozYaU{Y5KJve?uY11dkDCdw}A8+IOTY||29w5RY zCA#1^V6$kIC{BN?i@bx`wUS#KQ}sm(`@y@d!;xAdBaS_1LDQ|r>h(H#o;KnOM~4RU z3i3DLM@UU08I5REy&4meF?A2B7wX6)jmVS~BG&eTwW|jfFO@VFHl7t-eo?OK)#V?} z2Z4$VLI{t$Y!<30n|wQ_Yn&fzTAj10%z69GD@Ug9>c7s0>r1vCv}?_e+&RN?JfL$A z^6DtA;;psSC6h*jE6T22s8Z9`2$1miP#Y|@;F3OMT5v*t%}GM&ia~pz0tNdt`_A@A z6@A$*x`cxpKRgy61<&izk<+-{@5s5%dOLN!Rvay62X)s*FRQI3zJ(H16=J&9@u{5$ zUrq+x#10{d|G;a+S2gW68SZKG*8C~1P++gNntAj_MyeQ14QbO}=m`%#Ysz4*|DBs3 zKb{nDEnc|*XknaC8oY}pCg;c}skZoitGKXfe`sC}Pgo8(V z%6VOkwI<=0T^B8VOyuht@}9sv?KA)@p2#m=_#~3E#Z0-re_gjkX+tNi6~(poO|h18 zFwyxUrPK57V^VwUgU1NzpwQONse^skAr6@8vK7tTe9u+JNcqRs?5C33Vkd4Wy%axC zIeE;r=X)2v{kwxhsn1t=!&GA(c2pQ#6P;Y+e;?BpH?FRdYL~|J-cwC7s@rxrz#1^Q zfskL_>ly2QOu+@M!ujsMtW=!rrKU8wTzB9PWD{EpXVT5ZD^%T@Vg*Qi?BKK0DxB`Q zSgV+yDAxo9Hi)I(HsrI&vDoUN0?%LxArIo~qPQw0TlCk2uvRoF1#$<=^mORv#`@WOdP^aj)?8XiUY^Uxn4fQ$`Nl9FY zreYw3aIXtDKPXxFe5TA%(MO{*t5Ju3<@&|!?vnpVv)%;!j2EylTJCvB-aPYTaXbAe zpj$YsddCh4HyE4Hao>{=F`SX<9yzPwO~~zf!0kz)GdRKWquhTlsLMq<6?-p3nm zXVgT8vnH(q)Hrzllen$V>c@VuNA1Bo!%{Se_|e_qGTF6RTdOenA^n7NXwZ0Vr#GAy!3RvcFbn7cI~L|GZah~nJhyAEfr;#9R^Q4 zgim~)`D)w@Zm&%vQyHMkAE zyzBqfVC=e=bd`3w$;_|mp}^#w#>iB*qnw$iPi;)C#y0s8St+&`3VC58G|T!DIH<2i zutD~T**|nI&y{D$ED|UdaXq1YzaI>_g$+ln&85*LyW!Xq2@k#~Ch4jc4|M8@v+`{% z@r%<~m)`l$+R=t0@7drrrJ6j{^%FyGl`X${E-h=dHwHQcz{&dRt-ex26I8z;{u7$6 zj~w4S+cH;lq!*r&DOL`cpdsSoD|>x$6^3RxS43tUtks^-PEiBCyxDi=G5*qcPDKi0 z1twgXuOMw9sFv${lh#qV8JY@PX~(imZ3m5yB&tXx)4Lsv(>Z-leDf-kZZ)P?2wX)= z837ho)u!_tzeld~uRI3Cp0ZCEX6$YcC4d0Hypj-gobu$#tn|e|J2K7J;vA$8%S7=2 zhn?BV$?CcK6V5tD+o!OEWqLGD%PSsK`wKik{BYoC+}>5R7Ci1K6-ZAI`10`PE$SNT z2r$oRFr@rl_f1jS4;VO`qsnw1Y+v2NlOt{Qpv1A*X%KADZgea2dMD05qqLNwyxL=g^?}% z0eZKb+I_%3fNg?-K4+!_CVM2FXo=h<`F}^Q6HpE{rV%_*!9fx#cYd+3`6Tc|HWk6_ z>AW%HS*LeuZ&vNmaM>F9Fg3m2Y$IIiS>+LBCOvPFm-r_LIv6qwn_2v6xL-+;S=DE| zM->w`1F+L!@^dp+t|6&XugSgKmc_c6Qw!;M{8@-Up{jC7eZ`d5 zu{Co37TeWxKEtZgDtbQ81M3l+mH1tqzGoBNv>)oxnB{mefOqHIl}%V--|qa_Xg^$e ztgV8tdBL6c*>x|T>KT!{_cb3ya6<5g=h>f=Yt$773!Hrg$>c1g2=FS0w>+B|rX2^3 z!t$gEIiN;B3&&3m?k73f1owDNXZ*S=yMdD*Z<6AKXTv1d>xyc9QwfFcpsHo?np`E=M%a<{ryU>7A5 z!wA(jFccF9CIwGwh=ihuW%flE*jSBA_CGalR39))PZ?yygt(pEsu?R5n)NaKIT6n} zF}7i|$+P=)&z9Fy5upO*a^Ue}!d?cOl*6!^;GH&x#>wOOtLly4i}h1BuBUUporG8GJq`7$%|0FNTY@k?Ba6Q?d)dNR#z-ZBNz)AK4)=@e%%V^I%p+7 zPXH)l(~k&1qTM{|;IZr6F7tPgd_z+GWX^0Dx=s#q7kZBoKHAZGX0|F_wVrIrYB6ym z)f@jfyr_tSl*(&nJGh&$_%w(q?si`&8s$>0ac_l7)(X6Mv6gqfUCy%%N5n&i1}%J} z`C4Y*HRkK^sK4>J-5lD}eLnN1_g++t(MhVEwOTcDZb|KDz==X)%kWv8u!{h);aVur zHV_CiuC@($<-$tLb{oOJNs?s(<=~;5!3GmTQUiQWgna6XjoXp2K+54nUaHt)d$=zY zW8z;TEN;3S8lRq?btY3po72`zj~iGJAh+)BL8A@tM7`D#mXh$xFj&6TnAlP_0;ZEL za{_GovMBbb*1b(;L$0gQfR_7c)IUB=R?cBy>EIL$*2OkHMQz*Adg~H) z&rL4P7p0F=Hy6jquw*?)K6F8tMyoMjk^9W+CLF|DrwT_bsue!_D~Y@yE|dXLO$5IS zvFF9^p7p(0q#%6`v zXoA-3HcK!%qC7)g3~)RY4(%#uvCXC%u4g?1S74JS*YE8yq#C{zpG(s}I4g1YCBlcB zDoPwnKMM!a^z^Z~1W2 zh(%C;psx&%kk267t4+1k#|hJ9a7#Ut$RWM4C_0q!n1 zmF+}N4S)RxBU%0je~_6xzs?Hxf9FrCnAQ!15Y_0pZ4_C9WBPxxEFLEY?F)AsO%NmO zVQ@9ASzV;?`oS>}?@Y)DJA3d1)z9Jf?R# zC+r&Y2Gjx0uG%+U*cH(Agg@Zfmu>z{&Zi5?DJ!(GRe+s9V}iK<-NGp1WMJ|K!wh+b z55ZKq0LsIUy9M~EwKDr)4qBWJR5yzecJ{>TAl<+nV^Ms`da8Nkbvtj_1sGpG5WU?79c-GpYYd#a*AYJfBZgoE+HgWw(&4-hPiZjBZU{$Q zXeY*1w}A;NGYp-X9`0!+!DHf6#!y*)!uZAf0#q=8j(BhVhjPEWzu2h|_v37pB9byd zDHQio+61~IqZAyH23T17gb%xEpzK)vRgzle?E#4rD6Fp+g||)ZP|dzrDdta;xrum! zkXW%sUjGEiO2)^7yQs7wq7_3`bSqXwk8mDBGaFJ7EFpcypqR2-KBQe%_>|(`6HYXA zgzxI+cGY2yg}?s#K#ND*N0FnBk|U|rHIg*UTWNam1PeC*q_%}+LGwEvF#oqA=?`Nc zp;gATgh`iKGa91%+4vUgma4YiO~&^S%!V~90y}5Jf4mOnqwcJ0h%&DAY zJ~XH&r*QS{L*vl%DXT`RFoHJ^XlQ06w6)^hTAUmXz^RXbQ>_zukCzHj`4s-RzgoJ% zWv%+LXuxKo){OF+={)t)A@zWN2 zg9qCzN~}2o9oiwDwGLO2hH;$KKE_Z&tls@qMCY=Oew$M8AKS4_`9>9ysnRt>cZU7R z(TQ(6#7+_9+^>n!xx0qiIzJ&2e4%opHSw1L`_&y8{4_A#{o0r{y;pCR%pC-CNd4&G zR(u#AHp}%#8Batr-Jc&{M2aRp6k>y~x*Dq{xFRb35ai9>F7THiUKq26O*C=FY49dG zA`TbaF|g2iR{7Y3$nWaox`f-*qC~nc(by5$*8*pX1NLg>2cf_bNI&>(8~SwEK6qYp z=lBCJ{11fjYi&suuFKWB&7X(ebiY|j<@@5dMM^-RW+jin1YjVe>dESTF73Y`m(AWc z)o_CcU?L5HDEq~|_n7N4?u6LnBaLBaoZC=hB0%h5sP9JrmSi14IBRD>9R!?~G1cpw zP|y+-9e@apx3=Ub+@xs8pE?;JvKPyJ0B(1W>-zra{lut0199q<5QI_%Y>W*BfgcIKo2*S@EmsnhT5a9H7KSdN8)e}rSlq}U4U7ic|mnjyj>jW?db~6V*C7_h! z@wez9!#69thA@qTY=^0KRA2+ar~A|f5a2TQkzD;Z7W+-OIqJ?k2rkKJ^;r)@E$y+wd4B{s|D24o3cTm~0qcoWqKbJ(Lrp6^O?bBPdE zZyVi|7dd$$`X+Tn#7348B77m#>=syf zu|UBkqMyHU&tSDN)uJPAkpey!LHHjFc$~qeV;n&^*sW(R4^bX=TeRW3@Du-xbz$2J zX90z0yf300oY!YtMD6dzBSVCb5HiqK8{kLl{*q+)ky0c(?j(6m({A@v`F~%wCBGmy zm55Bj^0Ns`CT)h|w`;2nwv__A<(_B=c7u2F`m3+hPFjK=v4w&brifZ*+Q8O?&GZ^f z#Ps7mW5mBcc*xnWH!2asonE3KRcq)8qg@B=z9Qd45PTD`D; zMe13DScj&*#r(HZZDVfy&K={aN6I2fbRI5c1NPx3mP_h+KWJHt_Ou5IDtpmR6A=jM zrTMZVPZOdk@8~k8$dhPx*15P-DmGnw)StP~+U|TzG$>o^LYF6lg{t~fi>>G2VX&|x=?9E`p7xILKp^k3N-Gdpv3~K!cF~2FZ{bc@8FK^f(PuBm+1duLH zdksx8r8f~FRx7KvF3e7t{I2S|?%_kM3IES-t`8>}X%q>HKLe}*!8@WpwfpaDgX^sV zhiSgrh4*)Hd1X=bG`J&CW|V$ex2?3)@RxnBxp-dM%EOJv&rA-Z{2=_Yb+jb`aG(qS zuLKAO1+l)l4@5G+(|T0~laM`X(>JSSE}{$c+HX}s?|+KA4}P_3^OFo!if^Qra;iNo zb1&R=Hk@r)`x26FcIKv@5b6V z0?bdLAY&n)Do8`mm>6dPb2}fM-uI4_oom4-6UiHd)Mm4O*$y4nr_(V-0eR9R6vH*rY zRB^x`c^CmlSFI2N*B33>F-eCA4Dy3ZJO=IF#t&pSz6t6J6NPS%CVlpd_`j7FYnvTR zM0}ug#JsSaWOzw6PG0u0bwDbtyJ=4^#p?6IV+k%f4u{#MlR5q>hIK)PC%ZFmyp9zO znhUI-rO@!KE=MDy9GXut{i^bO)|Gw|P@pN}^yZ9xuo@Du>?W&5mgS#)KX)Sx} z8sb;lx?e8c2R;T@yFE%!fB*b%F>8L0qYyd+ZH{zP(96n(u)Vb z6T?tRO>z7U23o9ARD)U-R&}y`r%wK&Mc-qT0%Fv3*{cqQQg0#b^(HV<=x|^m(Z2CI zt#{qb^j(qJI`=Uk%ZYN0Fb{evYGcjD%h*$>v%hrhD>hW{0+^+|Jjx+bg@FG9jzax7 zq>y%1WPt%;zapHAncX9^)-fRAGk<*Czc}~!G2IHMP?cwhG&45cf66bXDnD)U^4i5b zywGEX@fJpoYL+$#WkrVlLS1U(N$umbD2q@Lzl!{q7pRTR^MUvmPtd3Igr>m%n6U(W zy881zDp6rnBGNv@0@tRBI8Wod>ct8k;Dnkux{m2tc|eI=@*(884j5fJ4Gn7+%WH#A zoqkKDGTh76LW^UHKRZyQ4S~yn#IuN+f0Q`4827*1=&lw$D5vyA<>o~m*>FN~arNhO zwvx!k(L9GEOr2*oGcI$N=OdeCJ$#GYT$@d)_670hk4}+@x5n({bItky13J=0N;*YPqeC@%vJduWUa+M;GQNi^ z4l?KRx9qR}d^6DxZ3us7^|%B{Y^BQKdQy3G9BYEsG_B{Ap;`J)$qd$BL*91Ejs6E^ z^>b_85V*e6#&ICfdj7&!C1#EMLn&sc+z0u0WINq5*dmgOPh5`!L_uCrJf1pWgTn=yGKIW9{Y_0c_T`lo2WR z-wYNVm&GWRT2Wm!_eYE71^n&)|7%!B9E)pwqfs*%TQmen@kAC;WdGM_$ys0=sdQ4i zqI1?KxI5((;!0qSf+cS;@#T4shb5qur`*TmOHQX{(uA(x$rTSt82Jn^r68Lh0Bx6R zpbcX}*F(hp?7Z7skW2NhtEctP)|SNHxjhK}nN&u$E7Z;Ysi8%dY(#Lyk7a&7-Xe~F zzPn#xOi$)Bp1X!F=lr!4?$L$SZw2df79{`A`p3}VpAxTV_Ce48|EB^gvgy&8AD(3J z?Sql?ryM$_I}qJAkya-Ds@Hve!Z`++LN@F@BmD6AY#PV4Pq$)!AobQ0&1>5K9F{Mo zpD(tO0h}G@pQR{nk4l}?WI`oiKJ~c?>t=u+Jgt~ts4B_7#P2>*xbJmv59L7~xS5P0 z96eu}0J<(Ha>93z6eH2N3brC_}}+ajZ=I=zj;mai0WXysu0ts>j&{ z37EZ;^^LT93eR2g@y_X-Ud>c$_^|9{?6{>^D8j^N3gmD9O|iaD_WsM@B=ei@5i>2( ziej5Etpn}QK+dDk883;;=_)}|ukUZePRAU9;4<}mb+D9JT;~?B+IRiF&$#-({V*PU zzT}$nP4{ylL*QTPjYX_Ch1bfpqRZ5FhH`H7-vfXFbAtCQbb2+x7Eel+C6kVV3#RgF zytD2rACVvKM-E3=OH<~TUfu^#x{8^R9wj+0L+Q6%t7aoRp`iF^&vm1oqaT(iq%F6$f~`d`>AY6*obAg?*C`^k#Wc7a~Hbv3V)QzyEP*U z6OZC`RHBzwOz0X0R>hTr=t%?lv$5 zW>Fyn*iH`BA(xEH2XJhjxy-x{q+SaBbh_)xPmc)?is7kI@4fsS>kHx&`9s6$WU(Ep zgsBgA9fEW4`5N3zu<-a)LL9vZr(5=uGHjdT{r&{#EHT}-3GV?kBTWKogZnVQXTb_P zt55XX-U7%xl+7w!B*Oj)+x{8MH~ z;i@32?Pq%n^=o-dB= z5K<&WBwH6bv>nh_+inKOKbdF>G3<^j$le8?*E(Mf6;m7^@`Olcze%rh%5{IctAguc zFGv4X3JZxg{6>^FlW(DVpyLmQ`}6iYO<(+=Y1m5s^Pbcg!esk7LAg73Qwd`3klpeH z3nhT*amkvakgzQdzUPJ3tARZ(oAoC&LyydZu56rveQt0m_=x1R%kShjQC+h*rS7lq zY}?W;B+~F~H?SDL$Aq`G=a;+wFjeyxwmb@Z3rkxlx!1l(v;v^vgdrj!0FW0MRP3tI z=|D(|)HkqM82=a<57n-B%5Y|L_fE}MV%wo^9^{RfF*EHUh+gtm&0182sRpO|3#`kA>Q@|i^9!!s$fTLn& zl$Bt24G}dYj#KabnY(}J>pPC3O8P;`J{2hm*>q6^_Ks|&N)fQRfwg0xb&zGTWgFh` z%O~WCTj^jBt4Py;=MDU|HrW%}rTy7YKIn2BS$hgG zjcgYp*M}mHAG;3rH386KLCE~#$2XV;Cd?p~oa;Lv94Q7Lbx1oBkGMQ^X_2o2a(`JfG@YOVS_QQ^0vKfP) zN-`mUt&x5Ec8xV&b?_$&Ga)jxS;w4^MwFogte6{RCjN5YAkI~G07%uipK7`KI37{n zxx3?OT1pW)!;?(pv?hsI{KxuXr@3)`x;o{%KQ8E(dZs;v_g7?|5Pa@zh}FN$&-mqj zO%o@`A@S6NVXTuh*JyUq{%`ANer|F>liEVbOUCEBkR_;llZ@i(?5JO4EVkpNVDikP zM3NY7hrf!it<7|L7M6d$L`y#7&uShQMuKavyk$0odnn${NQxL#j54kV0Q^|!E|g@ra?A)5q7$COm0 z+Jr>syZTq%rJM7qXRV)($DV%O_uBZxrS4slQ~gbJVazW}(F`JK07cmHINJ3Tl}*aP z)IK6~C1%%1GF?v8FJXshm}b@|L+0ZIdrga1TF1GGCn?MtNg~1aiaG1t1AQNtTBq2o zfwEg9HpR*DVz}yTPzVVnAhjl&SU|q`KygK z0J8r_x_qY0eIh_4yLX+DyGBpc(5go~`3X|C3_ybkinQ9C(Z=EW6<=>tL%d*Cjqf?t z^@k*$9P6VaL*D1VlEKIPufa$=DMLa}sQ0q)THi##GAomGJnSzOSUfh{U(6vLEES`z z#a7~#hk3^&=YdUpx9j9glRBHbiyVQD)<5MiFz0}3!h@FOKp=;5;J-&-eoUy}Bj7fKI5oQQPV%Sh^8ee2!tSM5u5k1!I{+ABESu}Wse z7zH7A*0Oo}uF)h2)z9&2O5tD%E~HciX3e+57`8fGHE*#FCXSoPVu09zkmZ--yV zoWm0|2Q{>ZTSa&R9uyq4UhM&|*`S><753o~&mIK(Z~!A`@7}Eu-NHJS2CH8*=GI%M zwsKfaKKx{0G=ZSEahZ*E-+V}cHed6eXgBy4x`I==VA*VcFqJ+q!%^*kCsoHZ`u3+1 zhx_#4fy%!>B9f45BAsZV)V^v(r$p;P1t z=yvw<+J*BS7U4L{ncvMLFi&+fH;whIIdQuP%m0XVx{#Y)=ltZ*r6iBHE-4DIGH@n< zvW6ErJ%jaRJ{t!%O~$OyaYApSk1G<%{KCFXeXZpzUNG0-to7YIFms!H<{onq5HGIj z@`ddr+i-=%6US~5bN$ABK+DsFVFLhu;hQG&A*xMc3|o^#Ixgo!j^q zll2F6pxfWKcH7=2(nQT)&&;u5b|8WH*ifrjq~2>@2Cb_Ja=z4A`PBErcX|q|uan*Y zW^xrzk)i#K7W2E1D(oJEkCsub|2+a>kUI|t_V?9d%(B<4&l@iOO~WA$myNc;j$y{p zaPorUnR11B`kIx=+~*R0U9rE9jKFL|{z`pC3ra)Q+qc_ZPCu>sSmNQn&2`vrW~-)w*!pjx1T1X+Ivc_t)T?C2?6z zL*+bvsnN$hN^|^>?Oh1!$cSh*!OJh2yY@8O?-RBBg_}XC;!Ns&+@oxfF3SW}MZs&l z2f__LhqQWX+LXTN5Rk5ZT?(CmV(Y)e;b;&|fjjY;!ZzOg<;#xF7h#qmzUXo*s#;>j zJFO+STjCOCFhp>RW`w|r1^n^j&LDJhbeP{}y$>e=q=i64_J3&J1PM`VoxuA`i)wa? z)b2QJ9bifP7*JB0A|E+43g93diUL9be{LtXZb0HTbY)p$PjpOdAT6kLx*5Po2|mxT%^HyrM+azb7HVK)B|I0g1y)r@^Rn3kPFs+=~OD zw;&jW;Q&P^hni8BILrD(_%PfA&Yh6j;m>ZGzX3NE zB{EIXoA~LM>*t6%mt;u24GzEJRpVNcp9Y`mn@;}Re$o$RlDMc$8tV;h*Wfw7|_kX+5IefPt=4N)sT4_1(5>I@U~ydIG6R(7Yh z{ocMk(cYyAgn^eOESg;23f|!y)Vq3b6ss^KW*}%LE##iaDBCGYJEN%*XEF zo76K8Y8z#pYI}Z{VW*Z$#ALU@F9h_Iphj9bcO!H~gO%F8?IN79QtocAjdj?7rT`Qp zv+Po{ngCHN-onBwS()dblfc$%GIQOS{7}!;*o!u9O_XZBDajK&O^v4aaWM-QB=nm$ zZ_EnA$xVMgn0MqOrA0Xl?8p+cw$B@GdiTI|u>ak6ZiLSo>zE~Z=dt#GOtVQznFD9} zpps;p4tzgc1q9_FKIRjbr?`m4cKX7z$>7ZHV}ZVBo#0MZnXA$Gvk+fjqJeHm=uV=i z+Z=E}fePZ*eHz#^dFZvQUO)}@7;i#8)xXg^FWV<+y7?Au`uh#CG@0K`!b9EUvOwy1 z54Q`9E95c@iWnBnTE7bx|9gceiem));~HJr_mwm#K5$z?@uLRZEe-B778OKG(b4EM zVF<;+<7tOOyBa+jMp44dQE%~25c${TBaI6AN^H3Yq@o4%JnaUSCHUj-&yzG9ZpGyO zW@wnBw(nO7gSj((;K+$ekT2nYz3y8L2mqf6>PO)3UWjaL++d^}@$0Vmz|o|;O7@A* zW}BhFj92Y^{l^LI_EH$fpcU5S!U^=e%U0TYjYBUKO0 zafWbpXQ~?a`z5VgI?PAB66O&>V|g{|E?XxD@|QzuN}GJ>8Q&@L{noC>82wDphfM*X zS2Finfs2Wdzxkh`MQFIN`@BQN!7!_}uXFe(z}!3e$8Yk#36>S#cMR`XMeT}b@|&$c z%d_sOzofggZ2nA%eeJ6HZ0n|x^T7XW@7lwm+}i#_Tf3-qKv9}Ph>$ahp`^$u5sKs# zNzRcNLv1^fLy<#BPN4&BLWrrIQbI{catOWE&C8t#-3egnH@zbAT^m3%vD{?< z2k&z|X@<<`mp6`rEk!h9L*(8y*LH>WWKr`Dc^2BuCDnbo2ON*HLXc6qcv9ABvlWAj z1HiK~U4f*6ps)KvZk%w7HHjx~5E_&$KPu+2z4_9}cP8Xdd2Ai`zC+EFo^e07Fx1oz z3<_kG3tiwquqkj-ZWgTx4O$j<^q_gyn+I0`t-jj&Rs6dfu^>@nR~pp-b|l4U4A{wE zX%x7rlKaM$H~H0yvT*PK7o4G^xv#H1ujyL#I8N*yK+4ntrhQ9|q3>nL@c(%f+GRa&9b%;Bo5lgm$>wi{r(Am>!%bu}^Vk1)x8)qyDcpAhew1wK<{ah_twpiirRC6IH3XPGFr&5so(Se0YHtTf_Ye1XC|58Knutzhs~uIc~nT)o9pjb?9ytHbvk)&p-uXID->LpAnk z0dVBRl*V78{Rt#t%ap|K8INAfTF9U-}B-BhGAQ6HhN7hncQ{ zLu+hr$hFR_Ad4!gqrQ`(tSi?#;qdC#7F#q}9eK3vCP(Sk*1c|`U5C=xH6c3(pz+e+ zu&a~r(-L{TCPcMshw%#8A6rid#eVF0oQ+|pmcz<`h1JD4e1e2m7mttdoCA14io)<| zK(sOBj$GI_x!*syf7oN&;|6ixb^Jxr$OY3Sw>hKGIe-mN{P+uF z!4LFjzE8RiWY0^ySY7>5EUAD`Z_L)o3jEMuqr8Ver6E6mQF_O!<^P#54Dj)oD}!x1^# zi4g?DiNdfuoG|p55IWC(u`B?7S82$Kf!ykuGk~~hUx#6)c%!I$%IEYu?8}Ny!uDUc zQdZ9Lfn)r>iys;BC88Jxq1^0P@Owcua=>JqaxEy(s@ru$Bf#EIN>Irkg7D@KEx%z{ zHs1Df$`w(s!3tOvl8S7#J?j$Q2Rt`Wx0{>2;(QrOfjA}%Tk$z|wDhnui4Ey7(t%|j1GrL+r{WefT%1$fzMj&3dR~%Sy9$uT= z&MGaY4hO;m4#{K$j=Z2fIW3ZR#u=FQG3+QloW@YIZX*vM`jLg#=m7|!|2S>(ro`te z4BO3%8qR(m(x!}#90T5D@7{=90sDd%R`zZY@3NN^vN>}|yHZGV^UPj_e%+25yl?XB0ZFpUS(M!4 z2d`nZi9`(KbQ}0Q0jXe?+$BdmG0kftA5PeR3B4!}inaOjFUD2w; zsr^6`lNEcVCXlpipE&P;>H*L1{y+pts)YUlWCMn6N7+RVa{!w}Qc#H36FA{ivg8pL+Y3T|E@y!*4jc#9e9?iW6$jQy83Lb~||yDowyU=ouRzSFlqO`1@VX?oE|Pc(6tBNSwLdE#Uoz z`wY1Ev1g|E=zRpk0h%;Bq84ug+u>IMZ^+cS^dK9Cs znruS8?$6VtTW)a8d)C$DTIS3hPh`i8x1z4^nN{5$tyZH*Q+bVwI+0OZ{+8gOt76(P ze_zw?TWXzRCi2+*T4#o9BM*L2l@14;dSI2siWyhO#!HPWZ0{E%w?;lBXssPkEC%fZ$pwzX{hOe#5iR zWG(|2A=I!RU%eE9{6fLX4P+H{{=4NIArDX4^6Ds%kP4SY+!^ zsG`j?o~%#<$z_&oF3+{XQ^zz$<64m-vhSKcA8htknUKN=7$!0vu7t%J@Wt3RcE|w= zn_1=c43vt$D?eEMx*AZC-M1pL z9(?0JcMz5*4m32L7ARj*wK-6Oux;eyc_m9L580er*w1ra`@~wpUQDYXPOxxCj}olqW&}nm20Wq01SSw++@G# zK0+y_uU_*&{vyS7Z&H7kIn&OD1%JaeqBm*bq4WL#a(Y{5DQ5@%)3NNe<$bHpDPmY6 zA6o0|J~TEv^%=BciXI)qn~uCrksQ7NXBp2(d+K^?z#x|hfQWLT_FaDi!1~nHu_y!P zU~T!9x52Bb98nU!iIZBI%L8?f@GZFpesS zfQuVpbtX<*Bi#V08J9PeYa;@^64=)3F!aVFwbDVaQ~6Q!ozP%yU0abM(kZ(H5K(I+ zD1E2#CN_%(C3AFCoza@+_8@o6J5mPQrc|*+-S%uRs*_o@LHAGbbS;q;VTsfdEmuI8vGLV z?mc@U-cHAfPrHz-zS%E4lfwAB1FQ^!&)`VZnFt;*(tqM6&4Ku@YDne zw^G4EI9Uo~g1pn%ols<#1-fx!X^Z#Ol2xxA8YnN*@}s7jsI#* z3OP}uRd58Gk#}7W`Uiw6)rCKa>4Pkr-$?q?=5YK6>!ucYKQtU%uNZdxun@oDAxTA_ z@ByP@Q0c&#hGIm;`q{4mu^Vh&^O9n1d};0N%7@6x@DN1k!$bK4o+!Cyo3|TS#g()+ zCLQ(efq0E}IFF8SG3!{GU=Ev|!?0(rcmoz7F!zXV>y+a$9iiRkSjw*#*Sk<m9%PzXtXs2;o$0Fev1%FFNk~3bGBF#OgD`D=3^A zDL$jVPJToZ2~@>aG)&a;3PqXLdCw!h24-zk)dK)eRG3h=1g7<1< zzxTt-4asxa0{((PG4FUg$p{$T;PlsHyOX2^tLZA#j8oO^C8HnlgSXgy{#U&`=eo;V zU-yA!KpRNDz#jqxW(RvobpW!>lZ3rtD&%44ZH3?gxEj@I?r)VPPxKj5e+XB%;9yIye*I zY|6SGm|P~WrDRCt8H{f+(+F&yEb(4Z3@e2*>T2vJAPHpqLtNuWo*EE-hcy%;b76r; ze>FFv?GL+&4+#hA3r$CSU?K1gUc-<6T7y^ezqkKKJrk2z;ZC*me&?WSJ$_>+x5Gr+ zJ)wagg#d3$V6K9roD!OZ+iKdUnH*;$k_0rR+WhCYFYv?vT=6lf`gE%crZ*G zsoa=&Mm`tUlPq}29()pT%2OT%ABKt`^B;!Y-952*SqH!DB476NvJl!kvAasKiN6wSMgpM^O?q7GQ?Q@|9l$K0K;b6mRD((Z!mc1D^+6Hkw|18X{6LEW9a3X3twq7LQJ%pQMD5B z5&VySrr$ATC{5-@T{*K>8dd!?uKhXdAH(xt5Yk^f@Anz~82&7ZSd;8-Wq*FRWP7R1b!h%1erD84gBqwTx zu^Qef&?q+a#pP$yjU~jhGBW8JjJm&o5RLMErP7RIJtu-c}m2MMJ4E)wlWa==DvpUxqRgyV2w^H=DZ{s`M;_{$yT*6AGV2+x3{XVC%d}$@2ELU^# zaPp1VR2GX>Un6XjAjNCqrbwPoAdA{WGG}eF&kc4`2#j&58}zY&PP)TkwQtlu4dbEb zO}Gy8bD8;5=Wu9C2%W_!h{_US^xCjKjOUoq!$Vtznec%w<8H!KW}9R#_Sa` zgId5EZ-jH>5Axg(;^zTA^(=0IY#0{Y3brT{1__)dG~rv>Hd|boGQ)06o8h-rYMt?I zqJp9}54L)#Hj|%vg=x6)%#1aSWoZls7=ZDb7<4g69(<6;x;%Sf|#rYr03FuV8ThI&Pqqzp{#qik%xvB6kF~CWPe|e3oJn;1^k_`0m q!c{}~qKdi$KL5XqIdzFX#v-a;dv3qpe|i$NSNDLScK*Iom;MJPQ~LY> literal 0 HcmV?d00001 diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/sample.json new file mode 100755 index 0000000000..57573588f4 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/sample.json @@ -0,0 +1,34 @@ +{ + "guid": "D661A5C2-5FE0-40F2-BFE7-70E3BA60F088", + "name": "Explicit Pipeline Register Insertion with fpga_reg", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA advanced tutorial demonstrating how to apply the DPC++ extension intel::fpga_reg", + "toolchain": ["dpcpp"], + "os": ["linux"], + "targetDevice": ["FPGA"], + "builder": ["cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./fpga_reg.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/CMakeLists.txt new file mode 100755 index 0000000000..2880b9dcf9 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/CMakeLists.txt @@ -0,0 +1,111 @@ +set(SOURCE_FILE fpga_reg.cpp) +set(TARGET_NAME fpga_reg) +set(TARGET_NAME_REG fpga_reg_registered) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) +set(FPGA_TARGET_REG ${TARGET_NAME_REG}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) + +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_executable(${FPGA_TARGET_REG} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET} ${FPGA_TARGET_REG}) + + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) + + set_target_properties(${FPGA_TARGET_REG} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS} -DUSE_FPGA_REG") + set_target_properties(${FPGA_TARGET_REG} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# report +if(WIN32) + set(REPORT ${TARGET_NAME}_report.a) + set(REPORT_REG ${TARGET_NAME_REG}_report.a) + + add_custom_target(report DEPENDS ${REPORT} ${REPORT_REG}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${CMAKE_BINARY_DIR}/${TARGET_NAME_REG}/${SOURCE_FILE} COPYONLY) + + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_BINARY_DIR}/${TARGET_NAME}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) + + add_custom_command(OUTPUT ${REPORT_REG} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -DUSE_FPGA_REG -fsycl-link ${CMAKE_BINARY_DIR}/${TARGET_NAME_REG}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT_REG} + DEPENDS ${SOURCE_FILE}) + +else() + set(REPORT ${TARGET_NAME}_report.a) + set(REPORT_REG ${TARGET_NAME_REG}_report.a) + + add_custom_target(report DEPENDS ${REPORT} ${REPORT_REG}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${REPORT} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT} + DEPENDS ${SOURCE_FILE}) + + add_custom_command(OUTPUT ${REPORT_REG} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -DUSE_FPGA_REG -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${REPORT_REG} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/fpga_reg.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/fpga_reg.cpp new file mode 100755 index 0000000000..c15255631b --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/fpga_reg/src/fpga_reg.cpp @@ -0,0 +1,216 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; +using namespace std; + +// Artificial coefficient and offset data for our math function +constexpr size_t kSize = 64; +constexpr std::array kCoeff = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}; +constexpr std::array kOffset = { + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + +// The function our kernel will compute +// The "golden result" will be computed on the host to check the kernel result. +vector GoldenResult(vector vec) { + + // The coefficients will be modified with each iteration of the outer loop. + std::array coeff = kCoeff; + + for (int &val : vec) { + // Do some arithmetic + int acc = 0; + for (size_t i = 0; i < kSize; i++) { + acc += coeff[i] * (val + kOffset[i]); + } + + // Update coeff by rotating the values of the array + int tmp = coeff[0]; + for (size_t i = 0; i < kSize - 1; i++) { + coeff[i] = coeff[i + 1]; + } + coeff[kSize - 1] = tmp; + + // Result + val = acc; + } + + return vec; +} + +// Forward declaration of the kernel name +// (This will become unnecessary in a future compiler version.) +class SimpleMath; + +void RunKernel(const device_selector &selector, + const std::vector &vec_a, + std::vector &vec_r) { + + size_t input_size = vec_a.size(); + + try { + queue q(selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer device_a(vec_a); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer device_r(vec_r.data(), input_size); + + event e = q.submit([&](handler &h) { + auto a = device_a.get_access(h); + auto r = device_r.get_access(h); + + // FPGA-optimized kernel + // Using kernel_args_restrict tells the compiler that the input + // and output buffers won't alias. + h.single_task([=]() [[intel::kernel_args_restrict]] { + + // Force the compiler to implement the coefficient array in FPGA + // pipeline registers rather than in on-chip memory. + [[intelfpga::register]] std::array coeff = kCoeff; + + // The compiler will pipeline the outer loop. + for (size_t i = 0; i < input_size; ++i) { + int acc = 0; + int val = a[i]; + + // Fully unroll the accumulator loop. + // All of the unrolled operations can be freely scheduled by the + // DPC++ compiler's FPGA backend as part of a common data pipeline. + #pragma unroll + for (size_t j = 0; j < kSize; j++) { +#ifdef USE_FPGA_REG + // Use fpga_reg to insert a register between the copy of val used + // in each unrolled iteration. + val = intel::fpga_reg(val); + // Since val is held constant across the kSize unrolled iterations, + // the FPGA hardware structure of val's distribution changes from a + // kSize-way fanout (without fpga_reg) to a chain of of registers + // with intermediate tap offs. Refer to the diagram in the README. + + // Use fpga_reg to insert a register between each step in the acc + // adder chain. + acc = intel::fpga_reg(acc) + (coeff[j] * (val + kOffset[j])); + // This transforms a compiler-inferred adder tree into an adder + // chain, altering the structure of the pipeline. Refer to the + // diagram in the README. +#else + // Without fpga_reg, the compiler schedules the operations here + // according to its default optimization heuristics. + acc += (coeff[j] * (val + kOffset[j])); +#endif + } + + // Rotate the values of the coefficient array. + // The loop is fully unrolled. This is a cannonical code structure; + // the DPC++ compiler's FPGA backend infers a shift register here. + int tmp = coeff[0]; + #pragma unroll + for (size_t j = 0; j < kSize - 1; j++) { + coeff[j] = coeff[j + 1]; + } + coeff[kSize - 1] = tmp; + + // Result + r[i] = acc; + } + }); + }); + + // Measure kernel execution time + double start = e.get_profiling_info(); + double end = e.get_profiling_info(); + // Convert from nanoseconds to milliseconds. + double kernel_time = (end - start) * 1e-6; + + // Kernel consists of two nested loops with 3 operations in the innermost + // loop: 2 additions and 1 multiplication operation. + size_t num_ops_per_kernel = input_size * kSize * 3; + cout << "Throughput for kernel with input size " << input_size + << " and coefficient array size " << kSize << ": "; + cout << std::fixed << std::setprecision(6) + << ((double)num_ops_per_kernel / kernel_time) / 1.0e6 << " GFlops\n"; + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } +} + +int main(int argc, char *argv[]) { + size_t input_size = 1e6; + + // Optional command line override of default input size + if (argc > 1) { + string option(argv[1]); + if (option == "-h" || option == "--help") { + cout << "Usage: \n \n\nFAILED\n"; + return 1; + } else { + input_size = stoi(option); + } + } + + // Initialize input vector + constexpr int max_val = 1<<10; // Conservative max to avoid integer overflow + vector vec_a(input_size); + for (size_t i = 0; i < input_size; i++) { + vec_a[i] = rand() % max_val; + } + // Kernel result vector + vector vec_r(input_size); + + // Run the kernel on either the FPGA emulator, or FPGA +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector selector; +#else + intel::fpga_selector selector; +#endif + RunKernel(selector, vec_a, vec_r); + + // Test the results. + vector golden_ref = GoldenResult(vec_a); + bool correct = true; + for (size_t i = 0; i < input_size; i++) { + if (vec_r[i] != golden_ref[i]) { + cout << "Found mismatch at " << i << ", " + << vec_r[i] << " != " << golden_ref[i] << "\n"; + correct = false; + } + } + + if (correct) { + cout << "PASSED: Results are correct.\n"; + } else { + cout << "FAILED: Results are incorrect.\n"; + return 1; + } + + return 0; +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/CMakeLists.txt new file mode 100755 index 0000000000..a94ffc91b3 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_CXX_COMPILER "dpcpp") + +cmake_minimum_required (VERSION 2.8) + +project(LoopUnroll) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/License.txt new file mode 100755 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/README.md new file mode 100755 index 0000000000..5c2528eeb0 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/README.md @@ -0,0 +1,188 @@ + +# Unrolling Loops +This FPGA tutorial demonstrates a simple example of unrolling loops to improve the throughput of a DPC++ FPGA program. + +***Documentation***: The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) provides comprehensive instructions for targeting FPGAs through DPC++. The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA;
Intel® Programmable Acceleration Card (PAC) with Intel Stratix® 10 SX FPGA +| Software | Intel® oneAPI DPC++ Compiler (Beta)
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | Basics of loop unrolling
How to unroll loops in your program
Determining the optimal unroll factor for your program +| Time to complete | 15 minutes + +_Notice: This code sample is not yet supported in Windows*_ + +## Purpose + +The loop unrolling mechanism is used to increase program parallelism by duplicating the compute logic within a loop. The number of times the loop logic is duplicated is called the *unroll factor*. Depending on whether the *unroll factor* is equal to the number of loop iterations or not, loop unroll methods can be categorized as *full-loop unrolling* and *partial-loop unrolling*. + +### Example: Full-Loop Unrolling +```c++ +// Before unrolling loop +#pragma unroll +for(i = 0 ; i < 5; i++){ + a[i] += 1; +} + +// Equivalent code after unrolling +// There is no longer any loop +a[0] += 1; +a[1] += 1; +a[2] += 1; +a[3] += 1; +a[4] += 1; +``` +A full unroll is a special case where the unroll factor is equal to the number of loop iterations. Here, the the Intel® oneAPI DPC++ Compiler for FPGA instantiates five adders instead of the one adder. + +### Example: Partial-Loop Unrolling + +```c++ +// Before unrolling loop +#pragma unroll 4 +for(i = 0 ; i < 20; i++){ + a[i] += 1; +} + +// Equivalent code after unrolling by a factor of 4 +// The resulting loop has five (20 / 4) iterations +for(i = 0 ; i < 5; i++){ + a[i * 4] += 1; + a[i * 4 + 1] += 1; + a[i * 4 + 2] += 1; + a[i * 4 + 3] += 1; +} +``` +Each loop iteration in the "equivalent code" contains four unrolled invocations of the first. The Intel® oneAPI DPC++ Compiler (Beta) for FPGA instantiates four adders instead of one adder. Because there is no data dependency between iterations in the loop in this case, the compiler schedules all four adds in parallel. + +### Determining the optimal unroll factor +In an FPGA design, unrolling loops is a common strategy to directly trade off on-chip resources for increased throughput. When selecting the unroll factor for specific loop, the intent is to improve throughput while minimizing resource utilization. It is also important to be mindful of other throughput constraints in your system, such as memory bandwidth. + +### Tutorial design +This tutorial demonstrates this trade-off with a simple vector add kernel. The tutorial shows how increasing the unroll factor on a loop increases throughput... until another bottleneck is encountered. This example is constructed to run up against global memory bandwidth constraints. + +The memory bandwidth on an Intel® Programmable Acceleration Card with Intel Arria® 10 GX FPGA system is about 6 GB/s. The tutorial design will likely run at around 300 MHz. In this design, the FPGA design processes a new iterations every cycle in a pipeline-parallel fashion. The theoretical computation limit for 1 adder is: + +**GFlops**: 300 MHz \* 1 float = 0.3 GFlops + +**Computation Bandwidth**: 300 MHz \* 1 float * 4 Bytes = 1.2 GB/s + +You repeat this back-of-the-envelope calculation for different unroll factors: + +Unroll Factor | GFlops (GB/s) | Compuation Bandwidth (GB/s) +------------- | ------------- | ----------------------- +1 | 0.3 | 1.2 +2 | 0.6 | 2.4 +4 | 1.2 | 4.8 +8 | 2.4 | 9.6 +16 | 4.8 | 19.2 + +On an Intel® Programmable Acceleration Card with Intel Arria® 10 GX FPGA, it is reasonable to predict that this program will become memory-bandwidth limited when unroll factor grows from 4 to 8. Check this prediction by running the design following the instructions below. + + +## Key Concepts +* Basics of loop unrolling. +* How to unroll loops in your program. +* Determining the optimal unroll factor for your program. + +## License +This code sample is licensed under MIT license. + + +## Building the `loop_unroll` Tutorial + +### Include Files +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (fpga_compile or fpga_runtime) as well as whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/get-started/base-toolkit/](https://devcloud.intel.com/oneapi/get-started/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake .. + ``` + Alternatively, to compile for the Intel® PAC with Intel Stratix® 10 SX FPGA, run `cmake` using the command: + + ``` + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + make fpga_emu + ``` + * Generate the optimization report: + ``` + make report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + make fpga + ``` +3. (Optional) As the above hardware compile may take several hours to complete, an Intel® PAC with Intel Arria® 10 GX FPGA precompiled binary can be downloaded
here. + + + ### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports +Locate `report.html` in the `loop_unroll_report.prj/reports/` or `loop_unroll_s10_pac_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. + +Navigate to the Area Report and compare the FPGA resource utilization of the kernels with unroll factors of 1, 2, 4, 8, and 16. In particular, check the number of DSP resources consumed. You should see the area grow roughly linearly with the unroll factor. + +You can also check the achieved system fMAX in order to verify the earlier calculations. + +## Running the Sample + + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./loop_unroll.fpga_emu (Linux) + ``` +2. Run the sample on the FPGA device: + ``` + ./loop_unroll.fpga (Linux) + ``` + +### Example of Output +``` +Input Array Size: 67108864 +UnrollFactor 1 kernel time : 255.749 ms +Throughput for kernel with UnrollFactor 1: 0.262 GFlops +UnrollFactor 2 kernel time : 140.285 ms +Throughput for kernel with UnrollFactor 2: 0.478 GFlops +UnrollFactor 4 kernel time : 68.296 ms +Throughput for kernel with UnrollFactor 4: 0.983 GFlops +UnrollFactor 8 kernel time : 44.567 ms +Throughput for kernel with UnrollFactor 8: 1.506 GFlops +UnrollFactor 16 kernel time : 39.175 ms +Throughput for kernel with UnrollFactor 16: 1.713 GFlops +PASSED: The results are correct +``` + +### Discussion of Results +The following table summarizes the execution time (in ms), throughput (in GFlops), and number of DSPs used for unroll factors of 1, 2, 4, 8, and 16 for a default input array size of 64M floats (2 ^ 26 floats) on Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA: + +Unroll Factor | Kernel Time (ms) | Throughput (GFlops) | Num of DSPs +------------- | ------------- | -----------------------| ------- +1 | 242 | 0.277 | 1 +2 | 127 | 0.528 | 2 +4 | 63 | 1.065 | 4 +8 | 46 | 1.459 | 8 +16 | 44 | 1.525 | 16 + +Notice that when the unroll factor increases from 1 to 2 and from 2 to 4, the kernel execution time decreases by a factor of two. Correspondingly, the kernel throughput doubles. However, when the unroll factor is increase from 4 to 8 and from 8 to 16, the throughput does no longer scales by a factor of two at each step. The design is now bound by memory bandwidth limitations instead of compute unit limitations even though the hardware is replicated. + +These performance differences will be apparent only when running on FPGA hardware. The emulator, while useful for verifying functionality, will generally not reflect differences in performance. diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/sample.json new file mode 100755 index 0000000000..3863df9d59 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/sample.json @@ -0,0 +1,34 @@ +{ + "guid": "2760C1B6-25E5-4280-9F8F-34CA8DDEDA7C", + "name": "Unrolling Loops", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/FPGA/Tutorials"], + "description": "FPGA tutorial design demonstrating the loop_unroll pragma", + "toolchain": ["dpcpp"], + "os": ["linux"], + "targetDevice": ["FPGA"], + "builder": ["cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./loop_unroll.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/CMakeLists.txt new file mode 100755 index 0000000000..3ca0487ff3 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/CMakeLists.txt @@ -0,0 +1,89 @@ +set(SOURCE_FILE loop_unroll.cpp) +set(TARGET_NAME loop_unroll) + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# Intel supported FPGA Boards and their names +set(A10_PAC_BOARD_NAME "intel_a10gx_pac:pac_a10") +set(S10_PAC_BOARD_NAME "intel_s10sx_pac:pac_s10") + +# Assume target is the Intel(R) PAC with Intel Arria(R) 10 GX FPGA +SET(_FPGA_BOARD ${A10_PAC_BOARD_NAME}) + +# Check if target is the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA +IF (NOT DEFINED FPGA_BOARD) + MESSAGE(STATUS "\tFPGA_BOARD was not specified. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for more information on how to run the design on the Intel(R) PAC with Intel Stratix(R) 10 SX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${A10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA.") + +ELSEIF(FPGA_BOARD STREQUAL ${S10_PAC_BOARD_NAME}) + MESSAGE(STATUS "\tConfiguring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Stratix(R) 10 SX FPGA.") + SET(_FPGA_BOARD ${S10_PAC_BOARD_NAME}) + +ELSE() + MESSAGE(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.") +ENDIF() + +set(HARDWARE_COMPILE_FLAGS "-fintelfpga") + +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation +set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${_FPGA_BOARD} ${USER_HARDWARE_FLAGS}") + +set(EMULATOR_COMPILE_FLAGS "-fintelfpga -DFPGA_EMULATOR") +set(EMULATOR_LINK_FLAGS "-fintelfpga") + +# fpga emulator +if(WIN32) + set(WIN_EMULATOR_TARGET ${EMULATOR_TARGET}.exe) + add_custom_target(fpga_emu DEPENDS ${WIN_EMULATOR_TARGET}) + separate_arguments(WIN_EMULATOR_COMPILE_FLAGS WINDOWS_COMMAND "${EMULATOR_COMPILE_FLAGS}") + add_custom_command(OUTPUT ${WIN_EMULATOR_TARGET} + COMMAND ${CMAKE_CXX_COMPILER} ${WIN_EMULATOR_COMPILE_FLAGS} /GX ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${WIN_EMULATOR_TARGET} + DEPENDS ${SOURCE_FILE}) +else() + add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) + add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS ${EMULATOR_COMPILE_FLAGS}) + set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS ${EMULATOR_LINK_FLAGS}) +endif() + + +# fpga +if(WIN32) + add_custom_target(fpga + COMMAND echo "FPGA hardware flow is not supported in Windows") +else() + add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) + add_custom_target(fpga DEPENDS ${FPGA_TARGET}) + set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS ${HARDWARE_COMPILE_FLAGS}) + set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS ${HARDWARE_LINK_FLAGS}) +endif() + +# generate report +if(WIN32) + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST WINDOWS_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} /EHsc ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) + +else() + set(DEVICE_OBJ_FILE ${TARGET_NAME}_report.a) + add_custom_target(report DEPENDS ${DEVICE_OBJ_FILE}) + + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_FILE} ${SOURCE_FILE} COPYONLY) + + separate_arguments(HARDWARE_LINK_FLAGS_LIST UNIX_COMMAND "${HARDWARE_LINK_FLAGS}") + add_custom_command(OUTPUT ${DEVICE_OBJ_FILE} + COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} ${HARDWARE_LINK_FLAGS_LIST} -fsycl-link ${SOURCE_FILE} -o ${CMAKE_BINARY_DIR}/${DEVICE_OBJ_FILE} + DEPENDS ${SOURCE_FILE}) +endif() + +# run +add_custom_target(run + COMMAND ../${TARGET_NAME}.fpga_emu + DEPENDS ${TARGET_NAME}.fpga_emu) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/loop_unroll.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/loop_unroll.cpp new file mode 100755 index 0000000000..bab7954bea --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/loop_unroll/src/loop_unroll.cpp @@ -0,0 +1,138 @@ +//============================================================== +// Copyright Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include +#include +#include +#include "dpc_common.hpp" + +using namespace sycl; + +template class VAdd; + +// This function instantiates the vector add kernel, which contains +// a loop that adds up the two summand arrays and stores the result +// into sum. This loop will be unrolled by the specified unroll_factor. +template +void VecAdd(const std::vector &summands1, + const std::vector &summands2, std::vector &sum, + size_t array_size) { + + +#if defined(FPGA_EMULATOR) + intel::fpga_emulator_selector device_selector; +#else + intel::fpga_selector device_selector; +#endif + + try { + queue q(device_selector, dpc_common::exception_handler, + property::queue::enable_profiling{}); + + buffer buffer_summands1(summands1); + buffer buffer_summands2(summands2); + // Use verbose SYCL 1.2 syntax for the output buffer. + // (This will become unnecessary in a future compiler version.) + buffer buffer_sum(sum.data(), array_size); + + event e = q.submit([&](handler &h) { + auto acc_summands1 = buffer_summands1.get_access(h); + auto acc_summands2 = buffer_summands2.get_access(h); + auto acc_sum = buffer_sum.get_access(h); + + h.single_task>([=]() + [[intel::kernel_args_restrict]] { + // Unroll the loop fully or partially, depending on unroll_factor + #pragma unroll unroll_factor + for (size_t i = 0; i < array_size; i++) { + acc_sum[i] = acc_summands1[i] + acc_summands2[i]; + } + }); + }); + + double start = e.get_profiling_info(); + double end = e.get_profiling_info(); + // convert from nanoseconds to ms + double kernel_time = (double)(end - start) * 1e-6; + + std::cout << "unroll_factor " << unroll_factor + << " kernel time : " << kernel_time << " ms\n"; + std::cout << "Throughput for kernel with unroll_factor " << unroll_factor + << ": "; + std::cout << std::fixed << std::setprecision(3) + << ((double)array_size / kernel_time) / 1e6f << " GFlops\n"; + + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cout << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.get_cl_code() == CL_DEVICE_NOT_FOUND) { + std::cout << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cout << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } +} + +int main(int argc, char *argv[]) { + size_t array_size = 1 << 26; + + if (argc > 1) { + std::string option(argv[1]); + if (option == "-h" || option == "--help") { + std::cout << "Usage: \n \n\nFAILED\n"; + return 1; + } else { + array_size = std::stoi(option); + } + } + + std::vector summands1(array_size); + std::vector summands2(array_size); + + std::vector sum_unrollx1(array_size); + std::vector sum_unrollx2(array_size); + std::vector sum_unrollx4(array_size); + std::vector sum_unrollx8(array_size); + std::vector sum_unrollx16(array_size); + + // Initialize the two summand arrays (arrays to be added to each other) to + // 1:N and N:1, so that the sum of all elements is N + 1 + for (size_t i = 0; i < array_size; i++) { + summands1[i] = static_cast(i + 1); + summands2[i] = static_cast(array_size - i); + } + + std::cout << "Input Array Size: " << array_size << "\n"; + + // Instantiate VecAdd kernel with different unroll factors: 1, 2, 4, 8, 16 + // The VecAdd kernel contains a loop that adds up the two summand arrays. + // This loop will be unrolled by the specified unroll factor. + // The sum array is expected to be identical, regardless of the unroll factor. + VecAdd<1>(summands1, summands2, sum_unrollx1, array_size); + VecAdd<2>(summands1, summands2, sum_unrollx2, array_size); + VecAdd<4>(summands1, summands2, sum_unrollx4, array_size); + VecAdd<8>(summands1, summands2, sum_unrollx8, array_size); + VecAdd<16>(summands1, summands2, sum_unrollx16, array_size); + + // Verify that the output data is the same for every unroll factor + for (size_t i = 0; i < array_size; i++) { + if (sum_unrollx1[i] != summands1[i] + summands2[i] || + sum_unrollx1[i] != sum_unrollx2[i] || + sum_unrollx1[i] != sum_unrollx4[i] || + sum_unrollx1[i] != sum_unrollx8[i] || + sum_unrollx1[i] != sum_unrollx16[i]) { + std::cout << "FAILED: The results are incorrect\n"; + return 1; + } + } + std::cout << "PASSED: The results are correct\n"; + return 0; +} From 0003549ca1f5f4db927a4091b3cefd83837eba94 Mon Sep 17 00:00:00 2001 From: Andrey <41368386+andrey4latyshev@users.noreply.github.com> Date: Sat, 22 Aug 2020 03:33:15 +0300 Subject: [PATCH 12/17] Add Hidden Markov Model Dwarf Sample (#87) * Initial commit * Code additions * Changes in the description of the code and README * Codestyle and README changes * Update hidden_markov_models.cpp * Included "dpc_common.hpp" in quotes * Update CMakeLists.txt * Fixing name of the source file * Fixing names * Description in the source file updated * Fixed sycl::runtime_error --- .../hidden-markov-models/CMakeLists.txt | 30 +++ .../hidden-markov-models/License.txt | 7 + .../hidden-markov-models/README.md | 89 +++++++++ .../hidden-markov-models.filters | 22 ++ .../hidden-markov-models.sln | 25 +++ .../hidden-markov-models.user | 9 + .../hidden-markov-models.vcxproj | 144 +++++++++++++ .../hidden-markov-models.vcxproj.user | 17 ++ .../hidden-markov-models/sample.json | 29 +++ .../src/hidden-markov-models.cpp | 189 ++++++++++++++++++ 10 files changed, 561 insertions(+) create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/CMakeLists.txt create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/License.txt create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/README.md create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.filters create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.sln create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.user create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj.user create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/sample.json create mode 100644 DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/src/hidden-markov-models.cpp diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/CMakeLists.txt b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/CMakeLists.txt new file mode 100644 index 0000000000..07ec9bb778 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/CMakeLists.txt @@ -0,0 +1,30 @@ +# required cmake version +cmake_minimum_required(VERSION 3.5) + +project (hidden-markov-models) + +if(WIN32) + set(CMAKE_CXX_COMPILER "dpcpp-cl") +else() + set(CMAKE_CXX_COMPILER "dpcpp") +endif() + +# Set default build type to RelWithDebInfo if not specified +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") + set (CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" + FORCE) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fsycl -std=c++17") + +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lOpenCL -lsycl") + +add_executable (hidden-markov-models src/hidden-markov-models.cpp) + +add_custom_target (run + COMMAND hidden-markov-models + WORKING_DIRECTORY ${CMAKE_PROJECT_DIR} +) + diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/License.txt b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/License.txt new file mode 100644 index 0000000000..e63c6e13dc --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/README.md b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/README.md new file mode 100644 index 0000000000..8a880848c6 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/README.md @@ -0,0 +1,89 @@ +#`DPC++ Hidden Markov Model` Sample +The HMM (Hidden Markov Model) sample presents a statistical model using a Markov process to present graphable nodes that are otherwise in an unobservable state or “hidden”. This technique is helpful in pattern recognition such as speech, handwriting, gesture recognition, part-of-speech tagging, partial discharges and bioinformatics. The sample offloads the complexity of the Markov process to the GPU. + +The directed edges of this graph are possible transitions beetween nodes or states defined with the following parameters: the number of states is N, the transition matrix A is a square matrix of size N. Each element with indexes (i,j) of this matrix determines the probability to move from the state i to the state j on any step of the Markov process (i and j can be the same if the state does not change on the taken step). + +The main assumption of the HMM is that there are visible observations that depend on the current Markov process. That dependency can be described as a conditional probability distribution (represented by emission matrix). The problem is to find out the most likely chain of the hidden Markov states using the given observations set. + +##Requirements and sample info + +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu* 18.04, Windows 10 +| Hardware | Skylake with GEN9 or newer, +| Software | Intel® oneAPI DPC++ Compiler (beta) +| What you will learn | Implement Viterbi algorithm to get the most likely path that consists of the hidden states +| Time to complete | 1 minute + +##Purpose + +The sample can use GPU offload to compute sequential steps of multiple graph traversals simultaneously. + +This code sample implements the Viterbi algorithm which is a dynamic programming algorithm for finding the most likely sequence of hidden states—called the Viterbi path—that results in a sequence of observed events, especially in the context of Markov information sources and HMM. + +- Initially, the dataset for algorithm processing is generated: initial states probability distribution Pi, transition matrix A, emission matrix B and the sequence or the observations produced by hidden Markov process. +- First, the matrix of Viterbi values on the first states are initialized using distribution Pi and emission matrix B. The matrix of back pointers is initialized with default values -1. +- Then, for each time step the Viterbi matrix is set to the maximal possible value using A, B and Pi. +- Finally, the state with maximum Viterbi value on the last step is set as a final state of the Viterbi path and the previous nodes of this path are detemined using the correspondent rows of back pointers matrix for each of the steps except the last one. + +Note: The implementation uses logarithms of the probabilities to process small numbers correctly and to replace multiplication operations with addition operations. + +##Key Implementation details + +The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. + +## License +This code sample is licensed under MIT license. + +## Building the `DPC++ Hidden Markov Model` Program for CPU and GPU + +### Include Files +The include folder is located at %ONEAPI_ROOT%\dev-utilities\latest\include on your development system. + +### On a Linux* System +1. Build the program using the following `cmake` commands. + ``` + $ cd hidden-markov-models + $ mkdir build + $ cd build + $ cmake .. + $ make + ``` + +2. Run the program: + ``` + make run + ``` + +3. Clean the program using: + ``` + make clean + ``` + +### On a Windows* System Using a Command Line Interface + * Build the program using VS2017 or VS2019 + Right click on the solution file and open using either VS2017 or VS2019 IDE. + Right click on the project in Solution explorer and select Rebuild. + From top menu select Debug -> Start without Debugging. + + * Build the program using MSBuild + Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for + VS2019" + Run - MSBuild hidden-markov-models.sln /t:Rebuild /p:Configuration="Release" + +### On a Windows* System Using Visual Studio* Version 2017 or Newer +Perform the following steps: +1. Locate and select the `hidden-markov-models.sln` file. +2. Select the configuration 'Debug' or 'Release'. +3. Select **Project** > **Build** menu option to build the selected configuration. +4. Select **Debug** > **Start Without Debugging** menu option to run the program. + +## Running the Sample +### Application Parameters +There are no editable parameters for this sample. + +### Example of Output +Device: Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz Intel(R) OpenCL +The Viterbi path is: +19 18 17 16 15 14 13 12 11 10 +The sample completed successfully! \ No newline at end of file diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.filters b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.filters new file mode 100644 index 0000000000..5f08be7fdb --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.sln b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.sln new file mode 100644 index 0000000000..10106f9039 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30320.27 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hidden-markov-models", "hidden-markov-models.vcxproj", "{46454D0B-76F3-45EB-A186-F315A2E22DEA}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {46454D0B-76F3-45EB-A186-F315A2E22DEA}.Debug|x64.ActiveCfg = Debug|x64 + {46454D0B-76F3-45EB-A186-F315A2E22DEA}.Debug|x64.Build.0 = Debug|x64 + {46454D0B-76F3-45EB-A186-F315A2E22DEA}.Release|x64.ActiveCfg = Release|x64 + {46454D0B-76F3-45EB-A186-F315A2E22DEA}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {B1D84B81-F5D5-4459-AA6E-38B695FB908B} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.user b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.user new file mode 100644 index 0000000000..fa6ed154c1 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.user @@ -0,0 +1,9 @@ + + + + WindowsLocalDebugger + + + WindowsLocalDebugger + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj new file mode 100644 index 0000000000..e894a8cca6 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj @@ -0,0 +1,144 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + 15.0 + {46454d0b-76f3-45eb-a186-f315a2e22dea} + Win32Proj + hidden-markov-models + $(WindowsSDKVersion.Replace("\","")) + hidden-markov-models + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + + + + + + + Console + true + + + + + + + + + %ONEAPI_ROOT%\dev-utilities\latest\include;%(AdditionalIncludeDirectories) + Disabled + Level3 + + + Console + true + /Od;%(SpecifyDevCmplAdditionalOptions) + + + + + + + + + + + Console + true + true + true + + + + + + + + + %ONEAPI_ROOT%\dev-utilities\latest\include;%(AdditionalIncludeDirectories) + Disabled + Level3 + + + Console + true + true + true + /Od;%(SpecifyDevCmplAdditionalOptions) + + + + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj.user b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj.user new file mode 100644 index 0000000000..e631a72cce --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/hidden-markov-models.vcxproj.user @@ -0,0 +1,17 @@ + + + + cpu + WindowsLocalDebugger + CL_CONFIG_USE_NATIVE_DEBUGGER=1 +SYCL_DEVICE_TYPE=CPU +$(LocalDebuggerEnvironment) + + + cpu + WindowsLocalDebugger + CL_CONFIG_USE_NATIVE_DEBUGGER=1 +SYCL_DEVICE_TYPE=CPU +$(LocalDebuggerEnvironment) + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/sample.json b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/sample.json new file mode 100644 index 0000000000..6dadf9de3f --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/sample.json @@ -0,0 +1,29 @@ +{ + "guid": "A63E408B-75ED-4379-A6B5-AF013C0EBA58", + "name": "hidden-markov-models", + "categories": [ "Toolkit/Intel® oneAPI Base Toolkit/oneAPI DPC++ Compiler/CPU and GPU" ], + "description": "Bitonic Sort using Intel® oneAPI DPC++ Language", + "toolchain": [ "dpcpp" ], + "targetDevice": [ "CPU", "GPU" ], + "languages": [ { "cpp": {} } ], + "os": [ "linux", "windows" ], + "builder": [ "ide", "cmake" ], + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [{ + "steps": [ + "MSBuild hidden-markov-models.sln /t:Rebuild /p:Configuration=\"Release\"", + "cd x64/Release", + "hidden-markov-models.exe" + ] + }] + } +} diff --git a/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/src/hidden-markov-models.cpp b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/src/hidden-markov-models.cpp new file mode 100644 index 0000000000..6b2e91a8c6 --- /dev/null +++ b/DirectProgramming/DPC++/GraphTraversal/hidden-markov-models/src/hidden-markov-models.cpp @@ -0,0 +1,189 @@ +//============================================================== +// Copyright © Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Hidden Markov Models: this code sample implements the Viterbi algorithm which is a dynamic +// programming algorithm for findingthe most likely sequence of hidden states— +// called the Viterbi path—that results in a sequence of observed events, +// especially in the context of Markov information sources and HMM. +// +// The sample can use GPU offload to compute sequential steps of multiple graph traversals simultaneously. +// +// - Initially, the dataset for algorithm processing is generated : initial states probability +// distribution Pi, transition matrix A, emission matrix Band the sequence or the observations +// produced by hidden Markov process. +// - First, the matrix of Viterbi values on the first states are initialized using distribution Pi +// and emission matrix B.The matrix of back pointers is initialized with default values - 1. +// - Then, for each time step the Viterbi matrix is set to the maximal possible value using A, B and Pi. +// - Finally, the state with maximum Viterbi value on the last step is set as a final state of +// the Viterbi pathand the previous nodes of this path are detemined using the correspondent rows +// of back pointers matrix for each of the steps except the last one. +// +// Note: The implementation uses logarithms of the probabilities to process small numbers correctly +// and to replace multiplication operations with addition operations. + +#include +#include +#include +#include +#include +#include + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +using namespace sycl; +using namespace std; + +// Matrix size constants. +// The number of hidden states N. +constexpr int N = 20; +// The number of possible observations M. +constexpr int M = 20; +// The lenght of the hidden states sequence T. +constexpr int T = 20; +// The parameter for generating the sequence. +constexpr int seed = 0; +// Minimal double to initialize logarithms for Viterbi values equal to 0. +constexpr double MIN_DOUBLE = -1.0 * std::numeric_limits::max(); + +bool ViterbiCondition(double x, double y, double z, double compare); + +int main() { + try { + // Initializing and generating initial probabilities for the hidden states. + double(*pi) = new double[N]; + for (int i = 0; i < N; ++i) { + pi[i] = sycl::log10(1.0f / N); + } + buffer pi_buf(pi, N); + + //Device initialization. + queue q(default_selector{}, dpc_common::exception_handler); + cout << "Device: " << q.get_device().get_info() << " " + << q.get_device().get_platform().get_info() << "\n"; + + //Buffers initialization. + buffer viterbi(range<2>(N, T)); + buffer back_pointer(range<2>(N, T)); + buffer a(range<2>(N, N)); + buffer b(range<2>(N, M)); + + // Generating transition matrix A for the Markov process. + q.submit([&](handler& h) { + auto a_acc = a.get_access(h); + h.parallel_for(range<2>(N, N), [=](id<2> index) { + // The sum of the probabilities in each row of the matrix A has to be equal to 1. + double prob = 1.0f / N; + // The algorithm computes logarithms of the probability values to improve small numbers processing. + a_acc[index] = sycl::log10(prob); + }); + }); + + // Generating emission matrix B for the Markov process. + q.submit([&](handler& h) { + auto b_acc = b.get_access(h); + h.parallel_for(range<2>(N, M), [=](id<2> index) { + // The sum of the probabilities in each row of the matrix B has to be equal to 1. + double prob = ((index[0] + index[1]) % M) * 2.0f / M / (M - 1); + // The algorithm computes logarithms of the probability values to improve small numbers processing. + b_acc[index] = (prob == 0.0f) ? MIN_DOUBLE : sycl::log10(prob); + }); + }); + + // Generating the sequence of the observations produced by the hidden Markov chain. + int(*seq) = new int[T]; + for (int i = 0; i < T; ++i) { + seq[i] = (i * i + seed) % M; + } + buffer seq_buf(seq, T); + + // Initialization of the Viterbi matrix and the matrix of back pointers. + q.submit([&](handler& h) { + auto v_acc = viterbi.get_access(h); + auto b_ptr_acc = back_pointer.get_access(h); + auto b_acc = b.get_access(h); + auto pi_acc = pi_buf.get_access(h); + auto seq_acc = seq_buf.get_access(h); + h.parallel_for(range<2>(N, T), [=](id<2> index) { + int i = index[0]; + int j = index[1]; + // At starting point only the first Viterbi values are defined and these Values are substituted + // with logarithms due to the following equation: log(x*y) = log(x) + log(y). + v_acc[index] = (j != 0) ? MIN_DOUBLE : pi_acc[i] + b_acc[i][seq_acc[0]]; + // Default values of all the back pointers are (-1) to show that they are not determined yet. + b_ptr_acc[index] = -1; + }); + }); + delete[] pi; + + // The sequential steps of the Viterbi algorithm that define the Viterbi matrix and the matrix + // of back pointers. The product of the Viterbi values and the probabilities is substituted with the sum of + // the logarithms due to the following equation: log (x*y*z) = log(x) + log(y) + log(z). + for (int j = 0; j < T - 1; ++j) { + q.submit([&](handler& h) { + auto v_acc = viterbi.get_access(h); + auto b_ptr_acc = back_pointer.get_access(h); + auto a_acc = a.get_access (h); + auto b_acc = b.get_access (h); + auto seq_acc = seq_buf.get_access (h); + + h.parallel_for(range<2>(N, N), [=](id<2> index) { + int i = index[0], k = index[1]; + // This conditional block finds the maximum possible Viterbi value on + // the current step j for the state i. + if (ViterbiCondition(v_acc[k][j], b_acc[i][seq_acc[j + 1]], a_acc[k][i], v_acc[i][j + 1])) { + v_acc[i][j + 1] = v_acc[k][j] + a_acc[k][i] + b_acc[i][seq_acc[j + 1]]; + b_ptr_acc[i][j + 1] = k; + } + }); + }); + } + delete[] seq; + + // Getting the Viterbi path based on the matrix of back pointers + buffer vit_path(range<1> {T}); + auto v_acc = viterbi.get_access(); + auto b_ptr_acc = back_pointer.get_access(); + auto vit_path_acc = vit_path.get_access(); + double v_max = MIN_DOUBLE; + // Constructing the Viterbi path. The last state of this path is the one with + // the biggest Viterbi value (the most likely state). + for (int i = 0; i < N; ++i) { + if (v_acc[i][T - 1] > v_max) { + v_max = v_acc[i][T - 1]; + vit_path_acc[T - 1] = i; + } + } + + for (int i = T - 2; i >= 0; --i) { + // Every back pointer starting from the last one contains the index of the previous + // point in Viterbi path. + vit_path_acc[i] = b_ptr_acc[vit_path_acc[i + 1]][i + 1]; + } + + cout << "The Viterbi path is: "<< std::endl; + for (int k = 0; k < T; ++k) { + cout << vit_path_acc[k] << " "; + } + cout << std::endl; + + } catch (sycl::exception const& e) { + // Exception processing + cout << "An exception is caught!\n"; + cout << "Error message:" << e.what(); + terminate(); + } + cout << "The sample completed successfully!" << std::endl; + return 0; +} + +// The method checks if all three components of the sum are not equivalent to logarithm of zero +// (that is incorrect value and is substituted with minimal possible value of double) and that +// the Viterbi value on the new step exceeds the current one. +bool ViterbiCondition(double x, double y, double z, double compare) { + return (x > MIN_DOUBLE) && (y > MIN_DOUBLE) && (z > MIN_DOUBLE) && (x + y + z > compare); +} From 771cb73080a88ea86b1c7c69274b34f5b36777a7 Mon Sep 17 00:00:00 2001 From: racheloberman-intel <52178578+racheloberman-intel@users.noreply.github.com> Date: Sun, 23 Aug 2020 10:45:45 -0400 Subject: [PATCH 13/17] adding 3 oneDAL daal4py samples (#98) * adding WIP daal4py sample for migration * more modifications and adding distributed samples for migration * more migration additions for LR and Kmeans * removing files from test runs * more edits * fixing inconsistencies and some corrections * fixing properties * modifying READMEs * modifying for ci * modifying for ci again * fixing sample.json again * more mods for ci * ci edit Co-authored-by: rachel.oberman --- Libraries/oneDAL/License.txt | 8 + .../daal4py_Distributed_Kmeans/License.txt | 8 + .../daal4py_Distributed_Kmeans/README.md | 112 ++++ .../daal4py_Distributed_Kmeans.ipynb | 254 ++++++++ .../daal4py_Distributed_Kmeans.py | 125 ++++ .../daal4py_Distributed_Kmeans_1.csv | 601 ++++++++++++++++++ .../daal4py_Distributed_Kmeans_2.csv | 601 ++++++++++++++++++ .../daal4py_Distributed_Kmeans_3.csv | 601 ++++++++++++++++++ .../daal4py_Distributed_Kmeans_4.csv | 601 ++++++++++++++++++ .../models/store_models_in_this_folder.txt | 0 .../results/store_results_in_this_folder.txt | 0 .../daal4py_Distributed_Kmeans/sample.json | 22 + .../License.txt | 8 + .../README.md | 119 ++++ ...daal4py_Distributed_LinearRegression.ipynb | 285 +++++++++ .../daal4py_Distributed_LinearRegression.py | 122 ++++ .../linear_regression_test.csv | 128 ++++ .../linear_regression_train_1.csv | 96 +++ .../linear_regression_train_2.csv | 96 +++ .../linear_regression_train_3.csv | 96 +++ .../linear_regression_train_4.csv | 95 +++ .../models/store_models_in_this_folder.txt | 0 .../results/store_results_in_this_folder.txt | 0 .../sample.json | 22 + .../daal4py_Getting_Started/Jupyter_Run.jpg | Bin 0 -> 21870 bytes .../Jupyter_Save_Py.jpg | Bin 0 -> 34787 bytes .../daal4py_Getting_Started/License.txt | 8 + .../oneDAL/daal4py_Getting_Started/README.md | 149 +++++ .../daal4py_GettingStarted.ipynb | 252 ++++++++ .../daal4py_GettingStarted.py | 105 +++ .../models/store_models_in_this_folder.txt | 0 .../results/store_results_in_this_folder.txt | 0 .../daal4py_Getting_Started/sample.json | 22 + 33 files changed, 4536 insertions(+) create mode 100644 Libraries/oneDAL/License.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/License.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/README.md create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.ipynb create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.py create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_1.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_2.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_3.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_4.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/models/store_models_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/results/store_results_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_Kmeans/sample.json create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/License.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/README.md create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.ipynb create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.py create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_test.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_1.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_2.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_3.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_4.csv create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/models/store_models_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/results/store_results_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Distributed_LinearRegression/sample.json create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Run.jpg create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Save_Py.jpg create mode 100644 Libraries/oneDAL/daal4py_Getting_Started/License.txt create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/README.md create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.ipynb create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/daal4py_GettingStarted.py create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/models/store_models_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/results/store_results_in_this_folder.txt create mode 100755 Libraries/oneDAL/daal4py_Getting_Started/sample.json diff --git a/Libraries/oneDAL/License.txt b/Libraries/oneDAL/License.txt new file mode 100644 index 0000000000..a3ab05efce --- /dev/null +++ b/Libraries/oneDAL/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +© 2020 GitHub, Inc. \ No newline at end of file diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/License.txt b/Libraries/oneDAL/daal4py_Distributed_Kmeans/License.txt new file mode 100755 index 0000000000..a3ab05efce --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +© 2020 GitHub, Inc. \ No newline at end of file diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/README.md b/Libraries/oneDAL/daal4py_Distributed_Kmeans/README.md new file mode 100755 index 0000000000..208d4a7dfe --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/README.md @@ -0,0 +1,112 @@ +# daal4py Distributed K-Means +This sample code shows how to train and predict with a distributed k-means model using the python API package daal4py for oneAPI Data Analytics Library. It assumes you have a working version of MPI library installed and it demonstrates how to use software products that can be found in the [Intel oneAPI Data Analytics Library](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onedal.html) or [Intel AI Analytics Toolkit powered by oneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit.html). + +| Optimized for | Description +| :--- | :--- +| OS | 64-bit Linux: Ubuntu 18.04 or higher, 64-bit Windows 10, macOS 10.14 or higher +| Hardware | Intel Atom® Processors; Intel® Core™ Processor Family; Intel® Xeon® Processor Family; Intel® Xeon® Scalable Performance Processor Family +| Software | oneDAL Software Library, Python version 2.7 or >= 3.6, conda-build version >= 3, C++ compiler with C++11 support, Pickle, Pandas, NumPy +| What you will learn | distributed oneDAL K-Means programming model for Intel CPU +| Time to complete | 5 minutes + +## Purpose + +daal4py is a simplified API to Intel® DAAL that allows for fast usage of the framework suited for Data Scientists or Machine Learning users. Built to help provide an abstraction to Intel® DAAL for either direct usage or integration into one's own framework. + +In this sample you will run a distributed K-Means model with oneDAL daal4py library memory objects. You will also learn how to train a model and save the information to a file. + +## Key Implementation Details +This distributed K-means sample code is implemented for CPU using the Python language. The example assumes you have daal4py and scikit-learn installed inside a conda environment, similar to what is delivered with the installation of the Intel(R) Distribution for Python as part of the [oneAPI AI Analytics Toolkit powered by oneAPI](https://software.intel.com/en-us/oneapi/ai-kit). + +## Additional Requirements +You will need a working MPI library. We recommend to use Intel(R) MPI, which is included in the [oneAPI HPC Toolkit](https://software.intel.com/en-us/oneapi/hpc-kit). + +## License +This code sample is licensed under MIT license + +## Building daal4py for CPU + +oneAPI Data Analytics Library is ready for use once you finish the Intel AI Analytics Toolkit installation, and have run the post installation script. + +You can refer to the oneAPI [main page](https://software.intel.com/en-us/oneapi) for toolkit installation, and the Toolkit [Getting Started Guide for Linux](https://software.intel.com/en-us/get-started-with-intel-oneapi-linux-get-started-with-the-intel-ai-analytics-toolkit) for post-installation steps and scripts. + +### Activate conda environment With Root Access + +Please follow the Getting Started Guide steps (above) to set up your oneAPI environment with the setvars.sh script. Then navigate in linux shell to your oneapi installation path, typically `~/intel/inteloneapi`. Intel Python environment will be activte by default. However, if you activated another environment, you can return with the following command: + +#### On a Linux* System +``` +source activate base +``` + +### Activate conda environment Without Root Access (Optional) + +By default, the Intel AI Analytics toolkit is installed in the inteloneapi folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command: + +#### On a Linux* System +``` +conda create --name user_base --clone base +``` + +Then activate your conda environment with the following command: + +``` +source activate user_base +``` + +### Install Jupyter Notebook +``` +conda install jupyter nb_conda_kernels +``` + + +#### View in Jupyter Notebook + +_Note: This distributed execution cannot be launched from the jupyter notebook version, but you can still view inside the notebook to follow the included write-up and description._ + +Launch Jupyter Notebook in the directory housing the code example + +``` +jupyter notebook +``` + +### Running the Sample as a Python File + +When using daal4py for distributed memory systems, the command needed to execute the program should be executed in a bash shell. To execute this example, run the following command, where the number **4** is chosen as an example and means that it will run on **4 processes**: + +Run the Program + +`mpirun -n 4 python ./daal4py_Distributed_Kmeans.py` + +The output of the script will be saved in the included models and results directories. + +_Note: This code samples focuses on how to use daal4py to do distributed ML computations on chunks of data. The `mpirun` command above will only run on single local node. In order to launch on a cluster, you will need to create a host file on the master node among other steps. The **TensorFlow_Multinode_Training_with_Horovod** code sample explains this process well._ + +##### Expected Printed Output (with similar numbers, printed 4 times): +``` + + +Here our centroids: + + + [[ 5.46000000e+02 -3.26170648e+00 -6.15922494e+00] + [ 1.80000000e+01 -1.00432059e+01 -8.38198798e+00] + [ 4.10000000e+02 3.78330964e-01 8.29073839e+00]] + +Here is our centroids loaded from file: + + [[ 5.46000000e+02 -3.26170648e+00 -6.15922494e+00] + [ 1.80000000e+01 -1.00432059e+01 -8.38198798e+00] + [ 4.10000000e+02 3.78330964e-01 8.29073839e+00]] +Here is our cluster assignments for first 5 datapoints: + + [[1] + [1] + [1] + [1] + [1]] +[CODE_SAMPLE_COMPLETED_SUCCESFULLY] + +``` + + diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.ipynb b/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.ipynb new file mode 100755 index 0000000000..8d245508dc --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# =============================================================\n", + "# Copyright © 2020 Intel Corporation\n", + "# \n", + "# SPDX-License-Identifier: MIT\n", + "# =============================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Daal4py K-Means Clustering Example for Distributed Memory Systems [SPMD mode]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## IMPORTANT NOTICE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When using daal4py for distributed memory systems, the command needed to execute the program should be **executed \n", + "in a bash shell**. In order to run this example, please download it as a .py file then run the following command (**the number 4 means that it will run on 4 processes**):" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mpirun -n 4 python ./daal4py_Distributed_Kmeans.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing and Organizing Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we will be using K-Means clustering to **initialize centroids** and then **use them to cluster the synthetic dataset.**\n", + "\n", + "Let's start by **importing** all necessary data and packages." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "##### daal4py K-Means Clustering example for Distributed Memory Systems [SPMD Mode] #####\n", + "import daal4py as d4p\n", + "import pickle\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load** in the dataset and **organize** it as necessary to work with our model. For distributed, every file has a unique ID.\n", + "\n", + "We will also **initialize the distribution engine**." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "d4p.daalinit() #initializes the distribution engine\n", + "\n", + "# organizing variables used in the model for prediction\n", + "# each process gets its own data\n", + "infile = \"./data/distributed_data/daal4py_Distributed_Kmeans_\" + str(d4p.my_procid()+1) + \".csv\"\n", + "\n", + "# read data\n", + "X = pd.read_csv(infile)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Computing and Saving Initial Centroids" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time to **initialize our centroids!**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# computing inital centroids\n", + "init_result = d4p.kmeans_init(nClusters = 3, method = \"plusPlusDense\").compute(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To **get initial centroid information and save it** to a file:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hers is our centroids:\n", + "\n", + "\n", + " [[ 5.46000000e+02 -4.95417384e-01 8.83354904e+00]\n", + " [ 1.80000000e+01 -4.12886224e+00 -7.35426095e+00]\n", + " [ 4.11000000e+02 -3.27940151e+00 -6.22280477e+00]] \n", + "\n" + ] + } + ], + "source": [ + "# retrieving and printing inital centroids\n", + "centroids = init_result.centroids\n", + "print(\"Here's our centroids:\\n\\n\\n\", centroids, \"\\n\")\n", + "\n", + "centroids_filename = './models/kmeans_clustering_initcentroids_'+ str(d4p.my_procid()+1) + '.csv'\n", + "\n", + "# saving centroids to a file\n", + "pickle.dump(centroids, open(centroids_filename, \"wb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load up the centroids** and look at them." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is our centroids loaded from file:\n", + "\n", + " [[ 5.46000000e+02 -4.95417384e-01 8.83354904e+00]\n", + " [ 1.80000000e+01 -4.12886224e+00 -7.35426095e+00]\n", + " [ 4.11000000e+02 -3.27940151e+00 -6.22280477e+00]]\n" + ] + } + ], + "source": [ + "# loading the initial centroids from a file\n", + "loaded_centroids = pickle.load(open(centroids_filename, \"rb\"))\n", + "print(\"Here is our centroids loaded from file:\\n\\n\",loaded_centroids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assign The Data to Clusters and Save The Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's **assign the data** to clusters." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# compute the clusters/centroids\n", + "kmeans_result = d4p.kmeans(nClusters = 3, maxIterations = 5, assignFlag = True).compute(X, init_result.centroids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To **get Kmeans result objects** (assignments, centroids, goalFunction [deprecated], nIterations, and objectiveFunction):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# retrieving and printing cluster assignments\n", + "assignments = kmeans_result.assignments\n", + "print(\"Here is our cluster assignments for first 5 datapoints: \\n\\n\", assignments[:5])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.py b/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.py new file mode 100755 index 0000000000..611abc988c --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/daal4py_Distributed_Kmeans.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +''' +============================================================= +Copyright © 2020 Intel Corporation + +SPDX-License-Identifier: MIT +============================================================= +''' + +# # Daal4py K-Means Clustering Example for Distributed Memory Systems [SPMD mode] + +# ## IMPORTANT NOTICE + +# When using daal4py for distributed memory systems, the command needed to execute the program should be **executed +# in a bash shell**. In order to run this example, please download it as a .py file then run the following command (**the number 4 means that it will run on 4 processes**): + +# mpirun -n 4 python ./daal4py_Distributed_Kmeans.py + +# ## Importing and Organizing Data + +# In this example we will be using K-Means clustering to **initialize centroids** and then **use them to cluster the synthetic dataset.** +# +# Let's start by **importing** all necessary data and packages. + +# In[2]: + + +##### daal4py K-Means Clustering example for Distributed Memory Systems [SPMD Mode] ##### +import daal4py as d4p +import pickle +import pandas as pd +import numpy as np + + +# Now let's **load** in the dataset and **organize** it as necessary to work with our model. For distributed, every file has a unique ID. +# +# We will also **initialize the distribution engine**. + +# In[3]: + + +d4p.daalinit() #initializes the distribution engine + +# organizing variables used in the model for prediction +# each process gets its own data +infile = "./data/distributed_data/daal4py_Distributed_Kmeans_" + str(d4p.my_procid()+1) + ".csv" + +# read data +X = pd.read_csv(infile) + + +# ## Computing and Saving Initial Centroids + +# Time to **initialize our centroids!** + +# In[4]: + + +# computing inital centroids +init_result = d4p.kmeans_init(nClusters = 3, method = "plusPlusDense").compute(X) + + +# To **get initial centroid information and save it** to a file: + +# In[5]: + + +# retrieving and printing inital centroids +centroids = init_result.centroids +print("Here our centroids:\n\n\n", centroids, "\n") + +centroids_filename = './models/kmeans_clustering_initcentroids_'+ str(d4p.my_procid()+1) + '.csv' + +# saving centroids to a file +pickle.dump(centroids, open(centroids_filename, "wb")) + + +# Now let's **load up the centroids** and look at them. + +# In[6]: + + +# loading the initial centroids from a file +loaded_centroids = pickle.load(open(centroids_filename, "rb")) +print("Here is our centroids loaded from file:\n\n",loaded_centroids) + + +# # Assign The Data to Clusters and Save The Results + +# Let's **assign the data** to clusters. + +# In[7]: + + +# compute the clusters/centroids +kmeans_result = d4p.kmeans(nClusters = 3, maxIterations = 5, assignFlag = True).compute(X, init_result.centroids) + + +# To **get Kmeans result objects** (assignments, centroids, goalFunction [deprecated], nIterations, and objectiveFunction): + +# In[8]: + + +# retrieving and printing cluster assignments +assignments = kmeans_result.assignments +print("Here is our cluster assignments for first 5 datapoints: \n\n", assignments[:5]) + + +# Now let's **export the cluster assignments** to a **CSV file**. We will also **stop the distribution engine.** + +# In[9]: + + +# now export the results to a CSV file +results_filename = "./results/daal4py_Distributed_Kmeans_results_" + str(d4p.my_procid()+1) + ".csv" +np.savetxt(results_filename, assignments, delimiter=",") + +d4p.daalfini() # stops the distribution engine +print('[CODE_SAMPLE_COMPLETED_SUCCESFULLY]') + diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_1.csv b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_1.csv new file mode 100755 index 0000000000..7f45cc383f --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_1.csv @@ -0,0 +1,601 @@ +,0,1 +0,1.8723965807238774,9.423076509081708 +1,2.3039829051048324,9.018580186635331 +2,1.026073451114729,8.077031073995432 +3,-8.541605432789838,-8.00341797317489 +4,-2.369401173189912,-6.975984734090902 +5,2.5806063684707987,10.217573085425613 +6,-9.284790097193767,-7.4097633737311375 +7,-3.522989397130624,-6.613010192479875 +8,1.027142105078586,8.80052166519931 +9,-3.2697078620655873,-5.893962453251839 +10,-8.885013378341931,-7.333503638663275 +11,-2.0396671351470204,-7.044169599415981 +12,-4.597620364286053,-7.453330250170903 +13,-6.456202788660033,-7.212171290757356 +14,1.4445918585874757,7.142250586300567 +15,2.1173329239322483,7.70433584845831 +16,-5.4802518839548515,-6.079679059100007 +17,-2.244797527183411,-7.0731238440633675 +18,-4.128862239659543,-7.354260953773871 +19,1.6884289800429058,9.819116848158599 +20,-8.610019349301364,-7.939921364213658 +21,-9.056524840424752,-8.6211365869879 +22,3.3059953897609953,7.8815868414717425 +23,-4.080336467441554,-6.722207807125867 +24,-2.6893506939487377,-5.168503633974499 +25,-3.3282809849682,-7.379647324555367 +26,1.5129512034685555,8.577779572751652 +27,-10.115832037074668,-6.748848596820765 +28,3.6388808782426136,9.185890863280441 +29,-3.00028100453964,-5.969928023226169 +30,-9.93319036402656,-5.786793165788258 +31,1.1090324024414029,9.291161672490887 +32,-2.8613517909224715,-5.354351487416557 +33,1.0291251821957932,7.976676963358322 +34,2.9700930258076887,9.467411686683057 +35,-8.168821654492318,-5.790117115599025 +36,-4.145244396361411,-6.28233426294262 +37,-4.707198834288474,-5.984294898402102 +38,-10.54314968159905,-6.521137980753572 +39,-2.8107961027624766,-7.600863870998378 +40,-8.343541203130618,-7.5235346431712875 +41,-3.561265069901654,-6.47598120139939 +42,1.602214674861818,9.914325233595067 +43,3.350406430966756,11.289498620860265 +44,-6.890126059113473,-6.852073175637249 +45,-8.926423471428778,-7.727377511846929 +46,0.8164245936495235,8.916619307728906 +47,1.97328080691373,8.403853043582245 +48,-3.348392512406978,-7.97108628676181 +49,-9.020767109684183,-6.546889799655273 +50,-8.833926198022965,-7.428422280319241 +51,2.5346141339862474,9.65332832411909 +52,-4.434886357325489,-7.066557876144199 +53,2.8120211842088287,9.696878530991425 +54,-2.0128979493397274,-7.269013395654566 +55,-5.168355005107381,-4.699037335501423 +56,-2.7036500461294777,-5.5151627803873655 +57,1.9776778941634992,8.919828769286113 +58,-10.340388439013493,-5.786781689624086 +59,-5.101517295194648,-4.830476798613082 +60,-4.800994153532466,-3.3894922643890757 +61,-8.470818425322038,-6.416551078654722 +62,1.2163842111707046,8.240738678586231 +63,-7.573810331519125,-5.989336620836275 +64,-8.229867526485254,-7.193233970728691 +65,-4.901514626307628,-7.585912020677485 +66,-9.231559142391077,-9.19746931272156 +67,-1.9230752736805574,-5.506535847318018 +68,-9.437917519275558,-7.826778197496669 +69,-7.659160480421658,-6.528634381080725 +70,-3.6438217631687175,-6.8571324102789 +71,-4.9559928341827435,-5.264716327780803 +72,-3.6526387444195936,-5.61501979239173 +73,-10.73660232270179,-9.618413219025292 +74,-7.850988109207649,-7.689798105008403 +75,0.8701242424569892,10.293793647904694 +76,-6.335238230849191,-8.227788204093063 +77,-2.052319669530889,-7.376260216839449 +78,-7.065371800261501,-7.653736489908707 +79,-4.869972297466505,-5.80687913084216 +80,0.9551076290473562,8.600680899343779 +81,1.61798032639464,9.514230838946418 +82,1.290303039005867,8.874762993091842 +83,1.7169297196710165,7.722133429760188 +84,-8.97140030945659,-9.953721246636322 +85,2.9441411029062454,10.073257625162373 +86,-8.562882804324602,-7.806641777045539 +87,2.415995112048434,9.36434699718389 +88,-4.56418486735835,-7.562291899884148 +89,-3.3726386527952292,-5.439598282833402 +90,-6.831168562244628,-8.563364402768862 +91,1.394409575533413,9.279097027932696 +92,-8.488381038275751,-6.724898042083206 +93,-9.504913246191423,-8.300327567851 +94,-6.948849104914956,-6.521919194615732 +95,-9.352569270247198,-7.774503943661293 +96,-8.948787065859575,-9.07106742804278 +97,-8.888113050595429,-6.3873028327127805 +98,2.4763833460431126,9.240278530822199 +99,-9.313632569339555,-5.737661563703921 +100,-7.7346131143536905,-7.821933935118758 +101,2.2174107493455897,9.652396968167826 +102,-2.689779361042282,-7.386373883622923 +103,-8.46279863033734,-7.90876135235025 +104,-10.291976305929914,-8.593599798344943 +105,-10.100955971386144,-5.991161008843616 +106,0.43397101161108087,10.232991025840189 +107,0.46895105516566526,8.378385146013642 +108,-7.373252885214495,-6.825063639557488 +109,-0.130378027403375,6.9190464066546715 +110,-4.020200293578918,-6.430029396302369 +111,-0.08921309008431444,8.191850705666758 +112,-3.8653134420514803,-5.4694094235747 +113,1.628227842220048,9.309015751656906 +114,-9.594737733986559,-9.003997363347098 +115,-4.769281807154009,-8.417076408378273 +116,1.6647750920863866,9.955257929870456 +117,-4.419150978955475,-5.539302132893461 +118,-8.566286257143563,-8.111681850177206 +119,-4.960922739518262,-6.5069242911263565 +120,-8.845626011494295,-5.780985990719593 +121,-3.803368204368129,-7.098085700545766 +122,-0.10628537209794042,8.653072759332863 +123,-4.389175840066328,-5.728288748906668 +124,-8.843339430157748,-6.307656890829538 +125,-3.7763663979968274,-6.772988814075323 +126,-1.8945877782961802,-5.67635112725773 +127,0.03130878326482467,9.65962901724951 +128,-8.934480603545063,-7.667867555390796 +129,-3.1632449424398255,-5.781542666823375 +130,-8.698422364088024,-6.923785911172841 +131,-3.438697704586029,-8.048713246022794 +132,-3.635278143631961,-7.391709231534364 +133,1.4878816870410658,10.434460320285321 +134,0.33919524795335887,8.360303461972801 +135,-2.912412623932284,-7.8874631834968305 +136,1.812496486099853,9.607077515538352 +137,0.6232376545308436,7.792875411524995 +138,-2.557308368694793,-5.6452808794139955 +139,-4.034444015689002,-9.090709842422477 +140,-5.5278062005678015,-5.861445423122923 +141,-2.80822541498089,-7.153794823046844 +142,-2.580346473036328,-6.956940028675239 +143,-9.031388716432902,-8.725501431311354 +144,-8.711556977947783,-7.0166882679856855 +145,-3.6924472044053593,-5.979849731209238 +146,-2.400875296282025,-7.3654860474319435 +147,-9.502494612938754,-6.882389220685811 +148,-8.383355689087542,-8.517502105879297 +149,1.6891420003666533,8.24934183280459 +150,2.740338881826161,9.545020463701205 +151,-4.247441724425719,-6.211270016948834 +152,0.9847351521480718,6.9708198696061485 +153,2.4114607797001315,8.73350761238257 +154,1.2320784072906812,8.97897642364675 +155,-2.1393920629714054,-5.057791738024882 +156,2.3671832737262184,8.46375484267337 +157,-9.74988460698268,-7.482378149268028 +158,2.3000007665895907,10.721851619443242 +159,-5.50780281880151,-6.428946324501107 +160,-2.897479913080972,-5.172777184331416 +161,-0.657999396336646,9.666995011660223 +162,-3.378063010317772,-6.323195949284838 +163,3.715349030367662,8.079777142112702 +164,-2.5346022348849875,-6.554414611933407 +165,-3.7261095610540207,-6.525586577496511 +166,-4.400979092409175,-5.482685404964332 +167,-5.183156574865519,-4.94726433336997 +168,1.7362635501608856,8.196687567472695 +169,2.3972014974539775,10.113895861761995 +170,-9.632491326549443,-8.017260707708173 +171,-0.5594012154597015,7.604364100618122 +172,2.3416747811127054,7.006744152775529 +173,0.8769675389837549,8.631891224545527 +174,-2.9879989464742245,-6.616673149083951 +175,-11.48989377544951,-6.116923703694228 +176,-7.043559206169571,-7.389109142947291 +177,3.949051276063594,7.963460398342924 +178,-8.922561846082933,-6.752903793551254 +179,-8.430183397712431,-8.504082904889342 +180,0.6245943758437478,8.199459766621962 +181,-5.780096953221933,-6.675029748733625 +182,-8.637738748456233,-6.569969041154689 +183,-8.951838895375104,-8.051232000315483 +184,-9.626070914197353,-6.264628060914582 +185,-9.145990202367672,-7.406681887093229 +186,-3.434801052362838,-6.187579768673667 +187,1.665414669161203,9.31199325225241 +188,1.9857290962543637,8.676310707858823 +189,-9.14129259344747,-7.564598304723135 +190,1.7803979810077042,9.265052880657668 +191,0.4667425063754851,7.805511391561872 +192,1.0665242623902753,8.781668124487213 +193,0.9859804989464445,9.33935533880398 +194,-4.224897569796012,-5.301022974691385 +195,2.180032721568203,8.556214069490203 +196,-4.363950110086683,-6.887267518229789 +197,-5.345620360032975,-5.345902231515946 +198,-9.873822668160358,-8.014234349652336 +199,1.8726679327582603,10.177105298248879 +200,1.5628991025595353,9.56172143949572 +201,1.0929361214337034,7.945435252037524 +202,1.817975788982048,9.973506416969428 +203,2.3421387813360517,5.507999588746552 +204,-8.545495510460189,-7.839907819878756 +205,-1.834346050272757,-7.334320951620844 +206,-8.7811793481458,-8.885832599647916 +207,3.3101922716031003,9.962395815306802 +208,0.7677167907391013,7.754761245418569 +209,-2.453242944764968,-8.345166172672885 +210,-2.1674273959241184,-5.6657351714780875 +211,3.129009590443727,7.285738404952774 +212,-4.390632916615479,-6.8399659983240095 +213,-8.70362515574467,-8.113278417832024 +214,-8.185032590903454,-8.245519546204777 +215,-8.554265650421904,-6.354471258905091 +216,-3.6238109363554485,-7.990631339289265 +217,-8.422208256528265,-6.237949347003386 +218,-9.062556919549863,-9.185027910001303 +219,-11.498894787253768,-6.882960931814834 +220,-10.275788114535857,-7.944099185829921 +221,1.7095239756030542,9.596021419978454 +222,4.030894272872818,8.587812385034185 +223,-9.317187475710424,-9.439334686580088 +224,-3.6530939607148105,-6.970908550723431 +225,1.651465790229752,9.927927019815462 +226,2.0700898465438335,9.64101532767761 +227,-4.146806648249522,-6.583169244052819 +228,-9.41873849787954,-6.296965466311001 +229,2.75105345921606,8.63533599691335 +230,-7.117352937226269,-8.354981611155464 +231,-4.113647867447832,-7.039554319326716 +232,-3.9940376887494233,-6.790554267856149 +233,-4.876577053923553,-5.290597175632066 +234,0.9835666371302964,8.301891192543446 +235,-3.4038342312836707,-9.032867323830587 +236,-4.141554979311696,-7.967736281926432 +237,-8.617460702681221,-6.628626633657286 +238,1.403168461357089,8.98081385091177 +239,1.2093458294649204,9.658775773256561 +240,-4.055100012797214,-5.294792682973709 +241,-2.783087065823997,-7.77376498017402 +242,-8.628549162973693,-4.9787131649897125 +243,0.7151568906878767,9.180827710651107 +244,-8.522317902226503,-7.285706505930886 +245,-8.487428757558858,-7.024641274080103 +246,-4.1178652200658314,-7.396756418455362 +247,1.7447684534115906,9.031670566483196 +248,-9.310081107059657,-7.234580417715351 +249,1.9332101560563706,8.007104563168681 +250,1.7310352269945617,7.384783889660245 +251,1.7947059994721024,9.882532713018065 +252,-7.289414765548567,-10.012923571907196 +253,-6.990409331327357,-7.133331998874008 +254,1.5316843670219304,6.587370791652179 +255,-8.810652853340734,-7.740094173343104 +256,-5.308610848170018,-7.995193978631127 +257,-4.50146448126074,-5.685088920689693 +258,-8.207486482202814,-8.232203072496342 +259,-3.2395098150180077,-7.889312916269965 +260,0.19520339576906864,8.684534892712938 +261,1.0810456300141347,9.895907121450538 +262,-10.241820115759646,-8.4574144650483 +263,1.4843270015157928,10.48844276076728 +264,-2.0528995769229246,-5.109246700530515 +265,-2.80602875067267,-7.116607906192952 +266,3.5303986412736306,9.16153512008025 +267,1.1871221323757888,9.649626296305442 +268,1.1662609737079332,7.753907332737844 +269,0.7520200489476977,7.953333506449035 +270,1.6157852796571222,8.57244834025245 +271,-3.8069406639093133,-7.425164023262084 +272,-2.868721951203068,-7.394562784992771 +273,-3.939070909536578,-6.813736857807444 +274,1.64420639444631,8.873192455820949 +275,-8.274472346059143,-6.637532536181894 +276,0.5714796608788868,8.018913195681947 +277,-3.827618940828862,-8.38233220367727 +278,-0.6961214572120009,7.678277612948944 +279,-3.543635462461846,-4.725334508934144 +280,-3.1822698131257106,-7.192578829145819 +281,-3.7129501936056197,-8.650556086913245 +282,-2.9029511162275186,-7.3689026849263755 +283,0.3851146360298001,9.836714477121308 +284,-6.8909485133434,-7.558780860440229 +285,-7.980234843184486,-6.322261578874826 +286,-8.931259986955775,-8.450579870297657 +287,1.6737535480317962,8.65024853655783 +288,0.826709738075403,8.09805941812536 +289,-9.302631974352856,-9.0454586149317 +290,-4.908765883636813,-7.065364826691322 +291,0.9272482963727093,9.151744299047177 +292,-4.704891488369021,-5.223417114033953 +293,-8.66128930295533,-9.421322598638117 +294,-2.3317547848791245,-7.277325144699796 +295,-10.113162936059052,-5.52573503696392 +296,-3.1446284099964332,-4.677418598405924 +297,-2.743809044607817,-8.05953682348741 +298,-10.220236959264621,-7.02780678286387 +299,-4.915008120364791,-5.091923121986243 +300,3.0639853629328946,9.549426153038652 +301,-7.208099932433294,-5.75728435160728 +302,-3.5252647247033626,-6.052245305030447 +303,-7.686145068033298,-6.062628295638922 +304,-3.1726651779710164,-5.228661146280547 +305,1.2982626763148466,9.87163188584001 +306,-8.471380564558972,-7.856583039077515 +307,-2.4194243830958095,-5.25583697706461 +308,-6.681092123226286,-7.612664207772949 +309,-9.125540384037064,-8.549585667559677 +310,3.0320588919262574,7.831240489969865 +311,-8.221064029858395,-7.236701215350069 +312,1.2412950888061665,10.017138742473664 +313,-9.535046634147104,-8.769890960265467 +314,-3.408398280085348,-5.96382974621914 +315,0.5413432601850015,9.097822363391515 +316,2.763691787338296,9.063179309030534 +317,0.9505229335340074,8.862938140668978 +318,-2.5189564601842322,-7.826025922618833 +319,1.4504727139845266,7.364710123749552 +320,-3.7735925506268915,-7.65494713405503 +321,1.4250384640172251,8.762099855880837 +322,3.160693347229116,7.6738226049792235 +323,1.7717731821304106,8.650182596558299 +324,-10.071016493767532,-7.337461506157067 +325,-8.55532715752027,-7.666000790269656 +326,-3.2204015351444726,-6.391362446736852 +327,-10.264066074526038,-8.27713867477806 +328,-8.46885778216139,-8.101892160300661 +329,-9.404541129281325,-8.21904518562134 +330,-4.072917331149152,-7.338156678245081 +331,-3.696902662518048,-8.11808079390431 +332,2.141842893029363,9.779204635114384 +333,1.4972505342845004,8.273713710336331 +334,-8.846787937810072,-9.02730383684036 +335,1.7660568824918503,8.1418782533338 +336,-9.651176645541238,-7.4042749213795895 +337,-3.606196666260453,-5.265726859015916 +338,2.828827773136019,9.487546474030562 +339,-4.400891371149478,-5.5457711253957385 +340,-2.307384358733934,-7.348399475401877 +341,1.1715850294245174,10.345182298603854 +342,1.4223996598626933,8.37771842373096 +343,1.451342637658223,9.826895261036695 +344,-10.132928887943466,-7.747290072161744 +345,-10.077173192464576,-7.7009641307717445 +346,-2.7403945790533495,-6.691107926961068 +347,-3.685188266257063,-7.118394980068402 +348,1.4297614168899035,8.991190711234426 +349,0.9699181835479108,10.008555079871995 +350,1.0370607398720144,9.630783542637518 +351,-2.752474877022182,-6.595211929514176 +352,1.7283514396945507,8.46295371511547 +353,2.347624499061898,9.269667488789345 +354,-3.1991203481039188,-7.586030234867592 +355,-10.702953880465959,-6.064055420015948 +356,2.139487696622963,8.192097506911837 +357,-9.802700270912315,-7.239184432887089 +358,-8.192232955020291,-8.17058960139829 +359,1.8855654851018984,7.5057242164919415 +360,-8.813873220046947,-7.472411012653497 +361,-4.129155979165331,-7.1065424150176195 +362,1.1012733349681794,10.472684478931251 +363,-9.717029748435161,-7.727844370470556 +364,-2.344091878136072,-5.280345353541628 +365,-8.782481937816403,-7.366299960389979 +366,3.8553344743880937,7.602650026832679 +367,-3.775402698272785,-5.668250996662408 +368,1.658484193833966,8.540554769258135 +369,-4.3526321806296,-7.192497620325609 +370,2.893876132796926,8.62656237208975 +371,-4.174631508186347,-7.826334613441495 +372,-10.550816260775768,-8.439343051683956 +373,1.8005486117342335,9.280398775888468 +374,1.3232385233144566,8.949975443235546 +375,1.5881939128241624,8.267200170691533 +376,-8.41536017266014,-7.728288579010116 +377,-3.4947561605515896,-7.483636021577496 +378,-0.15892687409951511,9.234260631001696 +379,-4.6823048714848925,-5.524518995949238 +380,-7.840175300256336,-7.503496406404144 +381,0.8365315425003037,8.22878104219965 +382,-8.744928882159561,-6.654946107242386 +383,-8.195667876629038,-8.196815597437658 +384,0.950055450605099,8.740084428459868 +385,1.4475900430160649,8.075076799105 +386,0.41654693130221077,8.635406961568314 +387,-9.27810291136573,-8.33645820603062 +388,-4.1615442027987894,-5.1658904153231955 +389,2.3858159594389976,8.07217645185868 +390,1.211024985945369,9.284066645806027 +391,-8.754916718713156,-8.370432633440902 +392,-9.826823645684605,-8.147552258744406 +393,0.3162752659865671,8.970021210588147 +394,1.5396120233089956,9.760003575747081 +395,0.07905807755856809,7.420249614630872 +396,-3.143881204768702,-6.214537881949882 +397,-8.170847736346687,-8.631169866939592 +398,-6.045080792096531,-6.927595217572093 +399,-7.479253588599581,-8.566301640143283 +400,-9.649688603020303,-8.274248401618797 +401,-8.403343361841443,-7.5147391259480845 +402,2.5358250807959606,8.309735399163488 +403,-3.4525140796501756,-6.637523823522272 +404,-2.572164495885186,-5.180095563618318 +405,1.9915612782471583,9.018087987152844 +406,-9.667516437002341,-5.889585605577505 +407,-8.933263844689996,-7.187094251719924 +408,-4.179465573588384,-6.155806824738958 +409,-4.98886553816402,-7.686720696788742 +410,1.544001113411751,10.154643826195707 +411,-3.279401506436028,-6.222804774858151 +412,-11.079237642121381,-8.447160410085862 +413,0.40898827425021644,9.523909932918087 +414,-7.0896159034552095,-7.375663059325421 +415,2.466951700395066,9.048522599661807 +416,1.3549265846304421,9.990276162396738 +417,-10.179634209317538,-6.551993593527024 +418,-2.873417246165876,-3.8708787407862117 +419,-3.5838799755018966,-7.238830183356529 +420,-8.66797609357964,-8.756997215451886 +421,0.7780488587914264,11.206822640440464 +422,-9.645810041253542,-6.512757622590329 +423,-3.618287088016478,-4.330567121113075 +424,-9.585955907487172,-7.564147078708663 +425,-8.740080835667838,-7.093178350044198 +426,-4.340047884211928,-6.289283842398694 +427,-3.539329548331343,-5.617207365017552 +428,0.8145812261578539,9.611219247626572 +429,1.5550445193814766,8.643999636842388 +430,-3.1660419049098456,-5.058791693995781 +431,-8.506868108800434,-8.230186674963203 +432,0.06495063517682231,8.597925521664141 +433,-5.1787863980691915,-6.790391702635773 +434,-2.3533743909411706,-7.154986513522051 +435,2.2132024398457215,10.20995443280583 +436,2.4179291103423024,8.770756439334107 +437,-8.050361016111665,-7.970934054841077 +438,-4.314910857410013,-6.592581802720625 +439,-8.468991499952105,-8.540475092737868 +440,-3.6745287386681724,-7.801683486768152 +441,2.6769491507548704,8.929830316657585 +442,-9.253226611110007,-9.648182830809313 +443,-9.229332311167068,-7.670127308808654 +444,2.005645904691318,7.548333527232663 +445,-9.811019277363656,-7.5469107802949456 +446,0.7705117663342075,9.434756976583303 +447,-3.193475478937037,-6.880808783112333 +448,-4.65246333395054,-6.350190774248163 +449,-3.924403785124271,-5.343525556631257 +450,0.5772856705077776,7.009740329844672 +451,-9.603555842930872,-7.081788382561963 +452,-4.140793777836625,-5.798587400881218 +453,-2.66124447687007,-8.585421104932015 +454,-4.4288076969023304,-6.34015358650103 +455,3.2987750889119223,8.891476862390457 +456,-10.311638892527279,-8.5792015261938 +457,-7.839470473020748,-7.80022621289589 +458,-8.824423145669883,-7.635995838947142 +459,-10.545470826021129,-7.975057715359307 +460,-4.090402313141579,-6.387145665940683 +461,-3.3930352700168953,-5.845673158148595 +462,-9.707906851582454,-8.178708047475155 +463,-8.453421549788144,-7.817722041984739 +464,3.7672808922012364,8.984680790128612 +465,-8.62245546740891,-8.99718474278544 +466,-4.105378275540775,-7.127140428716648 +467,2.289373378242087,9.032110991531445 +468,2.061570865234709,7.918839971777789 +469,-8.762917704970183,-7.593919503708617 +470,0.7466239496955772,7.202831373388115 +471,-4.166259049478555,-6.3568239760200855 +472,-10.254542843922076,-9.055265472063146 +473,-4.750741610871712,-6.30369747163497 +474,-8.741288195777253,-7.433891031459792 +475,2.744327795431177,9.639480684161692 +476,-2.7708325992894016,-6.0843946172169225 +477,-4.76250699365201,-5.666442283275416 +478,3.4120302342320437,9.791303820459628 +479,-8.456835872500541,-8.387123016486056 +480,-9.384927307809502,-9.039705459432938 +481,2.0001815688605764,7.458557021764323 +482,-8.973037900370395,-6.782326594034474 +483,1.1996468541091247,9.297809246320538 +484,0.8694674171783726,7.750627157483518 +485,-4.523949268699284,-6.068240796348103 +486,-11.114355667874628,-6.009628431691224 +487,0.22680744202711955,7.947038915303593 +488,-7.17883729541921,-10.007204254185938 +489,2.731860327313859,8.410358774077515 +490,2.369237333738112,8.118311518684367 +491,0.39718837250979067,9.689950662878346 +492,-10.594062983984935,-8.447903599968848 +493,-9.126887556946118,-7.586352247878466 +494,2.80772621833424,10.455228464896777 +495,-7.0700967084862425,-8.57191213659888 +496,0.2549569332878978,8.6630049015454 +497,0.5552308201889604,7.335795109030309 +498,2.358611871194677,9.42853369413311 +499,-9.854722791655632,-5.975132053549338 +500,2.243231929027873,9.028347430604606 +501,1.5712474856411065,8.388659822752174 +502,-8.402385914876355,-7.29554952535777 +503,2.20979232884183,10.17928859486892 +504,-10.374426936530666,-8.238281019318837 +505,-8.424020411194533,-6.068413075406102 +506,-10.439177579134729,-8.535513899022584 +507,-0.14858609159112524,9.867483448394383 +508,-3.789324744136027,-7.982485942795915 +509,1.5389893917548934,9.287642786529204 +510,-7.059422065959233,-7.768515167469511 +511,-8.273091794901928,-8.037743039265841 +512,-3.270714016837828,-6.443698650379267 +513,2.3719990096339716,9.488163674871737 +514,-7.494631778718563,-7.473736331488532 +515,3.177760269186437,10.301230325142157 +516,2.4847647075069466,9.044467921894118 +517,0.4747245556943478,9.479618388053982 +518,0.7307387064190303,7.344054451889517 +519,-3.718673556633533,-6.24751242488956 +520,-4.203934495595819,-5.477246935642584 +521,-10.956527197366361,-6.52839446404532 +522,-10.45540151347699,-9.656325885434203 +523,-3.3244805319519335,-6.651382585484791 +524,-8.6399626185148,-7.7781849322487915 +525,-9.31002459640484,-6.1691136682104055 +526,1.099132145651921,7.5104262897193435 +527,2.093363894780156,11.175939460291444 +528,-5.360987834135632,-7.628857486729695 +529,-4.179015108657714,-5.247315172788372 +530,-8.127839977129517,-7.414407795917798 +531,1.158039802878669,9.249295664364052 +532,1.0128605891160136,7.976993569544224 +533,-9.875102589864639,-8.781677789193871 +534,-9.22013369599315,-7.5753618079370195 +535,-9.593881768546728,-7.725725079429768 +536,2.688601106716163,8.004973440849191 +537,4.957863449887785,6.824437855793683 +538,2.027139370684587,8.443151933376354 +539,2.816276129933087,6.992273825369817 +540,-3.703838852513523,-6.2325005307085 +541,-2.281266775913224,-5.443136852045913 +542,-2.677881761014138,-6.8500830556833785 +543,-3.8185509912833515,-6.770154442633775 +544,-4.474543958990265,-5.364280370240795 +545,3.6302758672036366,7.870772188721467 +546,-0.4954173840642997,8.83354903535833 +547,-4.933280089876337,-5.261445371517363 +548,2.769698704951349,8.316545779971348 +549,-7.440664489021605,-6.180802953856893 +550,-9.473443460332929,-5.919257247274239 +551,0.9554491225982502,10.210935616318974 +552,2.7318748547755574,8.477537572978214 +553,2.9389732030994145,9.34148017129097 +554,-2.5692777445315715,-8.844791418470916 +555,-3.2603924590832447,-7.938713700724147 +556,-9.189116520727595,-8.468832973746789 +557,-4.557200370061501,-7.000110051833846 +558,-10.310033488435808,-7.495409782173115 +559,3.5759520905493143,7.932552255378711 +560,-7.327502792697469,-8.064658951024994 +561,-8.644168033316486,-7.906749723023718 +562,-4.5949896500763865,-5.43463703703466 +563,1.4180919145570554,7.026326398380954 +564,0.45599436901834345,9.294987403447209 +565,-8.929436959993833,-7.480089690122106 +566,1.7175477484811223,8.364498296468984 +567,-5.467083094870308,-6.522829897019783 +568,-2.8260330465980656,-6.314183995088117 +569,2.7994661468285633,8.627035021451887 +570,-3.8130436612767333,-6.798958321567207 +571,-3.607663430217996,-7.5267925631239905 +572,2.228804491245238,8.375852541779631 +573,3.4434183417396698,8.798086800122887 +574,-4.162618879661871,-7.819197266456447 +575,-3.7534828423258375,-5.381187830197566 +576,-7.461643143042459,-9.823978973461502 +577,2.54046667023458,8.086545273577975 +578,-7.650623637676037,-7.801223218648066 +579,-1.9356176107537841,-6.0676767574872805 +580,-8.725560271354123,-7.708164129909977 +581,-8.145985855304046,-6.633385506488888 +582,2.733082524628241,9.760364921676338 +583,-4.42086680978624,-6.895980047202914 +584,-9.943451626876106,-6.986204805483118 +585,2.973971827819502,8.445490492913766 +586,-3.1652790392218155,-7.066419255292364 +587,-7.877341354752501,-7.709253650253533 +588,1.382700039806736,8.552424788289164 +589,-4.549904887161464,-7.329165511971282 +590,1.562977517993382,10.431232258726215 +591,3.3463392740883675,10.257316238901439 +592,1.8017955090274103,8.107285812647607 +593,-8.06862812548864,-7.566035062135063 +594,-9.730384956127192,-6.640099526564223 +595,-3.7343824996753328,-6.603934480176569 +596,3.0385396810769523,8.263913515332236 +597,-9.600772140819236,-8.457568621982452 +598,-3.787016951021117,-7.062040066347958 +599,-4.256871138865526,-7.020373510207024 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_2.csv b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_2.csv new file mode 100755 index 0000000000..113f76e0eb --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_2.csv @@ -0,0 +1,601 @@ +,0,1 +0,-3.35868895962526,-5.826429539223068 +1,3.4355813134213773,9.14268041217317 +2,2.166199737114292,9.276004069100377 +3,-4.806514075257109,-6.176686810954834 +4,-9.822523562636738,-5.869481622132916 +5,-4.621139272574359,-8.060855434412078 +6,4.4747219357668,8.273629260511495 +7,-9.728530116227626,-8.220807598229582 +8,-2.6866283098463537,-5.020025295814362 +9,1.378767171150755,9.602659656493188 +10,3.867626479978728,10.522258109676596 +11,-10.019643967118993,-8.561204961208864 +12,3.1671710020026347,9.097971021194063 +13,-2.2081924833320143,-5.444843455012178 +14,0.23912793474722327,8.427876901433995 +15,-2.987759541591694,-7.08595589387631 +16,-4.1840159787834565,-5.0702149083565455 +17,-8.486533819214419,-8.667510627344571 +18,-4.494290823461447,-7.150450890392653 +19,1.2558119967323405,8.197899103301312 +20,2.1818213511607967,8.746432335591942 +21,1.3200760815286126,7.41796544553869 +22,1.8904972897281256,8.57257643043655 +23,-3.7193485363950143,-5.240623044135857 +24,-9.457513160532276,-6.671618580818271 +25,-5.231562221340827,-6.12900179615051 +26,-8.986651931095025,-6.837717212762383 +27,-8.207662597018455,-9.536569360167618 +28,-2.049579670066906,-6.472921400710251 +29,-9.639524600833964,-7.788209927657737 +30,-8.776334793089557,-8.156582541009008 +31,2.737094237923163,8.313448211969183 +32,-4.107258913773014,-7.253203528297377 +33,-7.614650904540343,-7.4661944917992304 +34,-6.427204478513291,-7.869472168097781 +35,1.2433061686957236,9.401338437774866 +36,-8.345174343300046,-7.4416437028036295 +37,2.547516417807357,7.45999636660081 +38,-2.134891928744789,-6.4757578017081014 +39,-9.259617221578104,-7.433731876292357 +40,2.4602912507737704,8.33101477113538 +41,-7.11571685833997,-7.048124365407645 +42,1.9126215443602343,8.681073040147224 +43,-3.9819779238702417,-6.99549817585417 +44,2.146158542856288,7.524796048577623 +45,0.5047319904292489,9.707045212565282 +46,-4.79657171125051,-5.501610863813567 +47,-2.46860991668539,-5.641766825613358 +48,0.6817225428391449,8.578205706512971 +49,-8.100503530554903,-7.807441348293653 +50,-8.650108390576605,-7.182412438427437 +51,-2.9958916237601296,-7.2583161696348695 +52,-3.941428858694723,-5.603165011366886 +53,-6.800720306446685,-8.310034132246175 +54,3.0545534181864817,9.084076988928487 +55,2.0408368304385878,10.36767129196232 +56,-5.485417828951816,-5.240275388904108 +57,3.5000948151612956,6.705805820263015 +58,-7.139992840170567,-9.637652647493104 +59,0.534345563443934,7.106713075523121 +60,-9.487190323558064,-8.643608714628892 +61,-10.946503509367886,-6.564003587764136 +62,-9.746889323072498,-8.275629851067816 +63,-10.751762571354863,-8.650720315364532 +64,-8.181450507546291,-6.911589426422167 +65,-9.963366436417864,-6.471998383848797 +66,1.298658872512382,8.687648446221763 +67,-3.350451117732089,-7.641549588114813 +68,-3.335282658114801,-5.598879748725009 +69,2.4556217150368376,8.58131342744841 +70,-9.2643768456267,-7.785201070272984 +71,2.899391735480882,7.911056585495816 +72,-7.942368048664481,-6.5356540345559635 +73,-2.651999504735985,-6.722436935826184 +74,1.7357972633743375,8.854520143412076 +75,-8.82523623566712,-7.443574374185343 +76,1.5658105126410122,8.897134555679377 +77,1.0846406534215636,7.805574898153825 +78,-6.2598035016727085,-7.191736037479638 +79,-8.0339143487797,-10.148106920133172 +80,-5.3089974641738005,-5.60796719232985 +81,0.5136605842533299,10.77829733806304 +82,-10.820330406425374,-6.8756069145675855 +83,-3.51663431135422,-7.8431676677974655 +84,-6.004239004261249,-9.01394660786411 +85,-2.6056363159115494,-7.080218521917058 +86,1.2708644552463981,8.280037053421355 +87,2.5750902829464914,10.762453016509184 +88,-2.310851729093784,-8.213899267416405 +89,-7.739093835667072,-6.693967868173796 +90,-3.5167808443889865,-5.732994482494412 +91,-4.625341959670964,-4.96359984148821 +92,-8.008935739663128,-7.917812022983215 +93,-4.035125440587534,-6.154899480363435 +94,3.266735037731962,10.083283351526973 +95,-9.321868948210108,-5.880898687288509 +96,-7.619112400661499,-8.278278629099495 +97,-7.916933316243835,-8.028861671937905 +98,-2.8922942877836904,-8.467058564910289 +99,0.6427336948274962,7.494782339842734 +100,-2.591894090735525,-5.623507523030862 +101,-4.233901683179618,-7.669608911240129 +102,4.077130089568358,9.739324028463148 +103,-3.7865516046932344,-7.301585981430177 +104,1.9373966198229344,9.442135245411798 +105,2.351472667527091,10.964720984200605 +106,2.259612363924928,8.666073474582355 +107,-3.4241252242756253,-6.80999732837145 +108,-8.484768711332697,-7.2980027089825175 +109,-8.305629765600244,-8.941137471494109 +110,1.2529692312854797,8.604391379735322 +111,-11.35119724791917,-8.301555544464481 +112,2.462216783210357,7.139435610827717 +113,-8.882140125130165,-5.961494357428298 +114,0.946523796780079,8.181139910228348 +115,-2.4743126495981844,-7.727722592953043 +116,-9.039188822190084,-8.943038932209545 +117,-4.306513385532167,-7.996291047500129 +118,1.7809907796220739,7.885617616039458 +119,-5.052854268602579,-6.436944776620838 +120,1.4857201203970574,10.11517675982338 +121,1.6593339300096615,6.538529282746874 +122,-3.301320021315037,-4.892692836144352 +123,2.2952744146262902,9.53089131246139 +124,-3.633765823556005,-6.310258913148314 +125,-7.461306205677059,-9.183803206577624 +126,2.1180913009139815,9.822597368237922 +127,-10.482056661389045,-6.830206837534433 +128,2.6557271763292967,10.012672354815626 +129,-3.6887394754006624,-6.681448861129166 +130,2.0526122706345244,10.126031120554 +131,-9.841517196358051,-7.642792994803499 +132,-0.1909503321475008,8.836758159529273 +133,-8.20962763862538,-8.663168824531486 +134,-3.082675678680314,-6.264927899309817 +135,1.7012279320561425,9.349539782977981 +136,-8.473941725240367,-6.192204136700906 +137,2.503751749735985,7.062362532878443 +138,-3.220248568089246,-8.12030731635864 +139,1.9310710128544837,10.019162630939851 +140,-9.433299379879964,-7.916117032617272 +141,-9.398968398973636,-9.408774226495124 +142,1.274811236642502,7.670054827039406 +143,-8.686273070305473,-8.148470148564801 +144,3.0444060537617714,10.36894670129741 +145,-8.787124666481784,-7.019780149417274 +146,-5.2403238395313725,-5.518098459118206 +147,-8.547542232490287,-7.5646976527021765 +148,-1.294372186921871,-5.486620013420126 +149,-9.931709264181752,-7.707463995343656 +150,1.8221329951589387,8.909619351308766 +151,-5.295664816085174,-7.08816085358701 +152,1.6944517864829436,8.622139688485015 +153,-9.849324736937627,-7.085781173605372 +154,-2.855985363889836,-6.579883060827188 +155,2.352132228282092,10.730356750386534 +156,1.772757926613877,7.900294244917435 +157,-3.270863798908827,-5.019187468844261 +158,1.528900578257351,9.246368843572169 +159,2.2320251542494427,11.008424116757629 +160,-10.175365813198393,-5.788379745910216 +161,0.5010976081720266,10.245478426775488 +162,-7.32001007404374,-6.419661704988274 +163,-4.721250239238775,-6.912615087252899 +164,-4.101986290415056,-7.334289906839879 +165,1.7123704438732543,9.443500763688121 +166,-9.86489135797329,-8.548428587904317 +167,-9.879669671065848,-6.944854821066275 +168,2.4445212219454864,9.559402984365923 +169,-3.852292461664126,-8.2681335481681 +170,0.8967997675511092,8.949548310125508 +171,-2.721995139277326,-6.09361457876869 +172,0.6438945543625096,8.386403782397283 +173,-7.9521654231971555,-8.392585667208966 +174,-3.9798384775388995,-7.7644599483475005 +175,3.054869087466681,7.650740654624071 +176,-8.917177539147152,-6.788692998439901 +177,-4.199315749342823,-6.649741955139275 +178,1.8446071377761974,8.542879630314024 +179,-10.370865833658097,-7.678558090112394 +180,-8.696610248438073,-7.80683884438522 +181,-4.591504741070676,-7.062977197077918 +182,-8.053309769515488,-7.328243543115297 +183,-4.3393288903737925,-5.359712477759928 +184,-2.6995807753878003,-6.716037188715158 +185,1.5722319880345381,8.81760158278003 +186,-5.197935388304108,-6.586627943659147 +187,1.4161911903308217,10.304612024805262 +188,-9.847186133153695,-7.6066732601412665 +189,-6.231331539324444,-7.292814833581683 +190,-3.4376902697810734,-5.6248900702480285 +191,-9.742391009540542,-5.493005459363653 +192,3.1544127997119564,8.969868118655281 +193,-3.3628339167225594,-6.5962869153545505 +194,0.4407989308348059,9.3750109172608 +195,-8.045589824652708,-7.336897559864184 +196,2.192133258753018,7.521017403589007 +197,-8.028420047145206,-7.12446546477376 +198,1.356756615707675,9.12045201318182 +199,1.7822482709057093,8.345210645186462 +200,-10.291191739926187,-6.099503854703206 +201,2.1553141492470718,10.304125715217145 +202,-3.891268021938617,-6.330393265614692 +203,-8.142559515236046,-7.651378613926669 +204,-5.414873889952844,-5.667608738502075 +205,-3.90285132381985,-7.021129722177164 +206,-7.839166310122564,-6.713653310984322 +207,-2.000718895530837,-4.8561327476159235 +208,-5.053225707353607,-4.665893734982673 +209,2.5280984151345662,10.282375528861602 +210,2.178031020209426,9.03074701038035 +211,2.2896565607400836,6.956175652238886 +212,-9.909257398886236,-8.025783287508078 +213,-2.7373874721053593,-8.129760447528312 +214,-9.470339427494082,-8.389456711580193 +215,-3.9965672509624453,-6.495812862881321 +216,-8.395219635108448,-7.834675563116395 +217,-4.941986125961326,-6.834141775734095 +218,2.2327108302998697,8.41414362862347 +219,2.287295859451085,8.887644780143772 +220,-7.845944624320129,-7.190703571009132 +221,1.5401686097423166,9.790724893103178 +222,-7.67880957172732,-8.133117602959778 +223,2.1861841301846354,8.78196937599449 +224,-4.238143352008502,-7.671738818280288 +225,-3.0965498171287154,-6.071726245923383 +226,-11.004835607039393,-8.716848062693376 +227,-9.175832646629356,-7.253346900483777 +228,2.1286414655151087,9.436285021740265 +229,-3.2460956238234475,-6.1200744731642 +230,1.6122318915319978,8.303560835915217 +231,-8.297627223448073,-8.98344888551434 +232,3.443593743589413,8.376205894418318 +233,-8.972835888261347,-7.907032371222781 +234,-4.819037441441789,-8.31956758929415 +235,2.7363876771241573,7.9143227643855 +236,-8.732282408784988,-8.40806966947132 +237,-3.270978516264587,-6.548173676954751 +238,1.0110716985856187,8.395317366377705 +239,-3.567051433094067,-5.961931965707605 +240,-8.974193176246727,-6.206988063769661 +241,-9.043580853434083,-7.578510466567455 +242,-9.825069377691243,-7.571454147497201 +243,-9.959202095503522,-7.757246268235972 +244,1.3112402040228384,8.708110207295846 +245,2.344417307917217,9.369574003359213 +246,2.1960978718119257,8.24373728738131 +247,-3.2994435945185496,-7.5167229946344705 +248,-2.9312612310190773,-5.86448053055016 +249,-5.6669205411466494,-7.73339528829164 +250,-10.773118997012762,-6.988895450707375 +251,0.44480349376541417,6.9062779746970735 +252,-4.829257856989387,-7.204356334213447 +253,-8.677214638678043,-8.911973906763063 +254,-4.341732055023909,-7.2440079453847215 +255,1.4325652748070907,8.989269695787607 +256,-10.234658135953211,-6.145124733151453 +257,3.8551057478274746,8.970969603145495 +258,1.5877285768403273,8.231664001979325 +259,-2.9810915170909293,-6.522368740685841 +260,1.1361107128315253,9.256875908157047 +261,-4.463584274422763,-7.337956883380673 +262,-2.221673494768367,-6.61700999368102 +263,-2.5272851092134845,-7.245398720606795 +264,-4.626595662051341,-8.111892439896705 +265,-2.4010013889885258,-6.60762504208151 +266,-10.453008187368908,-9.277485875834412 +267,0.9807962426766026,8.525032007539878 +268,-5.529838919610108,-6.46897247791777 +269,-2.7850067892399637,-7.79461325635951 +270,-9.016728365455332,-6.736368845268928 +271,0.19613786668944466,9.145783871679404 +272,2.6158780164924567,10.526752836282698 +273,0.8735491475411368,8.748009691123961 +274,-8.521798633270743,-8.365648072531284 +275,-0.20211009113790102,9.45960548467821 +276,2.094411277506846,8.581819267882855 +277,-4.84706997923652,-5.438319811144145 +278,1.533227470384584,8.891516033673929 +279,-9.345243191550866,-8.953088415079424 +280,2.1968837683848186,9.357393775028457 +281,1.9676360256255412,7.905775199259102 +282,2.7666406810494615,8.893172489221879 +283,1.1123331957549405,7.089192550064636 +284,-9.019325251132761,-7.907631659489907 +285,-2.893192161876666,-6.814903945239156 +286,-3.0269458041799973,-6.059131577331172 +287,2.570401696944053,8.305842017624176 +288,1.5655854042904174,8.74156655418919 +289,1.0628175690130237,9.762262480977503 +290,-10.840015823233166,-8.682333870606595 +291,-4.373218793921528,-7.171230408886504 +292,-3.2000459048433614,-8.783724031606651 +293,0.856161136022519,8.920792062660606 +294,1.1841844787645357,9.237269039840463 +295,-7.439762544783075,-7.479858626024707 +296,-4.227927571641294,-6.936506216069389 +297,-3.2625108713404107,-6.4710368562105485 +298,-2.5931960537310177,-6.818546536603943 +299,-7.710660587588098,-7.139909842153512 +300,-2.6538339208759245,-6.528345062073545 +301,-3.645592242498573,-5.712865136712878 +302,1.7976882615408294,7.681012266921148 +303,1.8957060092820301,8.948752372953674 +304,-10.209732995673647,-7.379583885916444 +305,2.4898870588344018,9.67522366010307 +306,-4.656201688465136,-6.4040923032719395 +307,-4.178020157153271,-4.263163447068377 +308,2.947568746150873,10.41016287956812 +309,-10.311337650813531,-7.7016387455981565 +310,2.899643832061082,9.553846364823723 +311,3.4031947099000304,6.793837809132251 +312,0.20789685988955853,8.703813212813328 +313,-10.342936071131309,-7.335644866798318 +314,-3.837916986133941,-5.64697648617353 +315,-8.946377468568006,-7.6440966476104535 +316,2.2023859840552835,7.65246528379812 +317,-7.944711603608673,-8.52101235152373 +318,-6.887057552825117,-8.421060347430048 +319,-9.144718100460349,-8.138501182562196 +320,-8.69398046588213,-6.733942468119798 +321,-8.605288738024067,-5.8113180142678615 +322,-3.003901781953935,-6.678674367936518 +323,-4.069352394765035,-5.498412068615528 +324,1.3862411272877035,8.051730244382817 +325,0.649524922637946,8.195130900829021 +326,-5.551270778837715,-5.688935361303699 +327,-7.365694900240971,-6.710256401860435 +328,-3.801798115837755,-5.421961104436759 +329,2.0087239251474527,9.132767223361697 +330,-2.7804797280226135,-6.567233465468077 +331,2.782014961246936,10.009510594547592 +332,-2.3492644502138282,-7.350733351497415 +333,-4.206831963234683,-3.4421170183703773 +334,-9.604973346130063,-7.731200202743458 +335,-5.004073768118336,-6.70213921936662 +336,-1.8628613842403539,-6.609383400201124 +337,-3.952870764311914,-6.525559378756727 +338,-0.08365741281414563,8.927430260113754 +339,1.7542838569691432,8.24173217837714 +340,-3.7262691189584594,-7.652280786853464 +341,1.5580318808749487,8.603542840151066 +342,-2.371466118704887,-7.187780307497872 +343,0.8290263218300455,8.252906060095047 +344,-10.432328322940055,-9.203523570365311 +345,-4.259914492517776,-6.565955647408584 +346,-10.774445675551492,-8.899759099508032 +347,-7.809371437533612,-5.966188204402244 +348,-2.6606718377353547,-8.53042766347821 +349,-8.287999454561152,-7.9341340655673305 +350,2.86466120959558,8.883999015070891 +351,-3.102741786258152,-6.214305342528414 +352,1.7402868823328423,8.066138050333372 +353,-8.058843812732409,-6.082200097858894 +354,-10.538382508045645,-9.15746793794495 +355,-9.713297716658428,-7.184332169664756 +356,-10.437879087402509,-5.268573955836219 +357,-2.734892534740736,-5.98875513800358 +358,0.5226987795344917,7.294056764420545 +359,-3.0607384898678216,-7.7585968388597655 +360,1.6936963900367203,9.665414464515292 +361,0.9202265492009503,9.447548712887738 +362,1.3890555748632991,6.0066285825575925 +363,-8.874206745756213,-8.280187276924956 +364,0.5136443196089762,8.817861363688756 +365,-4.775571604113167,-7.198194850532794 +366,-3.341858478491677,-8.101998132270335 +367,-7.99367565381126,-8.882547523354486 +368,1.9337420700556978,10.01666620357398 +369,-10.395557819411785,-7.508024562561236 +370,-8.55702189527164,-5.68806053209533 +371,-5.13844879794808,-6.223853336672088 +372,-8.197201783076405,-9.215423475535736 +373,-8.278897263977411,-8.55048452631327 +374,-8.66197140398328,-8.248117237229634 +375,-3.006743479262847,-5.93423353306401 +376,-8.267782020725011,-7.778194164911808 +377,-5.083553805875464,-6.016886081312889 +378,-3.9407723920511106,-6.464315298228791 +379,-7.083564965305358,-8.615057127161643 +380,-3.90779724587636,-6.604006945277915 +381,-10.7012857676038,-7.568656886403618 +382,-4.116603353431634,-6.150041930350558 +383,-8.278883798618557,-6.804438950715207 +384,-2.827373858118777,-4.551046282794583 +385,2.158307636228448,7.145022958514378 +386,-8.137932040124426,-7.806895797410348 +387,-11.131194018210197,-8.413216117496031 +388,-5.788425975700523,-7.953896326150473 +389,-4.620097654178233,-6.310484872527448 +390,-8.478880483642358,-9.204281358114084 +391,3.504208761427533,9.36879171541479 +392,-2.926987256481567,-5.491045552832883 +393,-9.476155252156401,-7.701364445205184 +394,2.229614102651407,7.71008891054642 +395,-8.30997775631249,-6.911040660872131 +396,-4.71974410585834,-6.38576628604139 +397,-8.056280287222378,-9.280595305783406 +398,-3.4225775155743694,-7.878439916434665 +399,-2.0853909321871917,-6.879355180933256 +400,-9.319525183116145,-6.8769547083138844 +401,-8.245983352751873,-8.321261728811903 +402,-3.3714298668646565,-5.950275017858789 +403,-9.00988343723979,-9.298496814665032 +404,-4.10592381024369,-5.918628469975301 +405,2.5936068911339762,8.76714571096227 +406,1.715426613235632,8.545618375337266 +407,1.8386101193460516,11.151159191103638 +408,3.2441170619894777,6.416826938113145 +409,1.191626983310161,10.46818371366243 +410,1.6430030126260013,7.929862784787724 +411,-3.038543958519568,-7.463916995199537 +412,1.3201160157782514,7.564886953060057 +413,2.6514050368865023,9.864242492056405 +414,-7.969901511981649,-6.534473589445678 +415,-8.518470244643877,-6.898948871818382 +416,1.7977746647612987,8.468637650035655 +417,-8.376124454197242,-9.028969361919913 +418,-9.122897845891714,-8.239785318173022 +419,-5.289802430217316,-5.992438020441808 +420,-3.635438082896571,-5.730273861471589 +421,-9.77253536308097,-8.261125493159623 +422,-3.4221666763669836,-7.268500499835926 +423,-9.611020882151013,-8.769076789301296 +424,-11.325229986370699,-5.705853262440593 +425,-3.365929740057175,-5.644627040063918 +426,-4.900398230837069,-7.061212971349762 +427,-10.171859893388046,-8.275274941644462 +428,-4.575123981001321,-6.452261305928669 +429,-8.135563136348908,-7.716422477177896 +430,3.7322331465984124,5.422108729599827 +431,-9.29526213846877,-7.626578669444718 +432,-4.237566821627697,-7.55005737896009 +433,-9.183361948844631,-7.570684255292109 +434,2.7097980229426373,8.43958518362943 +435,-9.192923549857293,-7.53471946498263 +436,-9.67752430211944,-7.410216574460519 +437,-7.967875902628163,-7.559640794070247 +438,-3.3319935812854173,-8.396814624265595 +439,-3.5940064636472444,-5.374015552057795 +440,-1.5492094557452,-7.17812214097376 +441,3.2600412739553857,8.651218199782074 +442,-10.184282349192125,-6.604160884644846 +443,-3.93421035676419,-8.314076821804923 +444,-1.7680480218377566,-6.7506327668413215 +445,-8.407416462813392,-6.185746288386427 +446,-3.332866268516075,-7.911213684246127 +447,-8.547386855384353,-6.619893671873742 +448,0.6418789264448461,7.268533137555316 +449,-3.736679259883213,-7.429561167168869 +450,-2.4183282579016465,-5.5442011571539 +451,-3.7143354613884396,-4.9109581342459 +452,-8.653352329841526,-6.758439737388211 +453,-4.817456185387494,-6.298423954607866 +454,-9.413912300534099,-7.663639080709745 +455,-4.793771934490004,-7.060045755486593 +456,-7.941843972547384,-9.594954224784123 +457,-3.902419674554519,-4.631760045687949 +458,-3.9825829521920357,-6.364153621508403 +459,0.8621510490630231,7.668541143487601 +460,-6.650141102609025,-8.218777499609105 +461,-3.8472827531400733,-6.597442445907043 +462,-3.569322199391855,-5.9645134407216664 +463,-8.146286308000464,-6.128937296910369 +464,-9.750535050249626,-8.51696139122317 +465,1.7159090210086831,8.369375333648767 +466,-11.121195487486151,-8.788963931131986 +467,-4.074627342273818,-5.891460062736109 +468,-3.0347257670849856,-6.0028152286746925 +469,-9.186386987296094,-7.920976668130999 +470,-6.7905433465090255,-6.3612687635612355 +471,-5.554880340398808,-6.636631911492183 +472,-3.4352895448772296,-5.731896371561744 +473,-10.838899284430546,-7.152570323659559 +474,-8.54860352122666,-6.5216233759320685 +475,3.161878344164532,9.347916489345463 +476,2.9666892258902706,9.643799186052602 +477,0.23182081567599044,9.114102444020164 +478,-8.055967894993012,-7.482813770441974 +479,0.36569616829843854,8.39705531037486 +480,-8.314137471818114,-8.158984139838683 +481,-3.9219865911561036,-6.024563898363107 +482,-1.8470686606663926,-6.064940124304251 +483,-10.010005981707781,-5.847878391023799 +484,2.0841261866243816,9.752415577568359 +485,1.2196490622390979,9.044615878836572 +486,1.5556686737158538,7.4190159791173365 +487,-5.169309568731435,-6.464305198493605 +488,0.8678704288717831,7.624208081599757 +489,-2.8456583331275023,-8.04100291426108 +490,-2.8564600285075823,-7.1373502752514515 +491,0.8086140671702274,8.262778160520003 +492,-3.625017072198839,-7.548857262168908 +493,-5.204686260531217,-4.689976310520366 +494,-8.40229914634969,-5.660631753126961 +495,-9.307312585572348,-7.153677154915194 +496,2.5529579725720186,8.299066074733663 +497,-4.41328800219494,-8.01017206969573 +498,-2.7960777128921808,-6.852967888455779 +499,-2.5208586601654543,-6.504870551634729 +500,-4.691768444229939,-7.613347696500981 +501,-3.2757809197875303,-5.590685062433759 +502,-4.250152895681304,-7.285967822690733 +503,-9.204293263845981,-9.084210002503024 +504,-3.0196361833879566,-7.004943430493599 +505,-8.562991571522256,-7.171363619659097 +506,2.7529256487972065,7.488883167233098 +507,1.830505520850492,8.892999330798872 +508,-8.161948836267698,-9.532139285875404 +509,2.7431477646713143,7.973300601077908 +510,-3.5069453753898236,-6.23879032061062 +511,-8.317256487913806,-6.618204024712531 +512,2.300759448648913,9.528896374784418 +513,-2.8552241932608386,-7.267866191046745 +514,-2.7046388629725895,-7.873394665205723 +515,1.0449359642174199,9.346094109846385 +516,-3.0592566171782027,-7.785107890662386 +517,-9.439183598162222,-8.093294870053596 +518,-3.68735504474474,-7.225645765490254 +519,-3.5596360828045785,-7.18260924418626 +520,-2.973973904340412,-5.705324949803886 +521,1.7716757659658577,8.761365543878778 +522,-9.390697751969299,-9.450181516627964 +523,1.6123347926763898,9.775468320939863 +524,1.8646981608070459,7.93117601131186 +525,-9.596169716821008,-8.178547610350808 +526,-8.541383684734354,-7.842370307620071 +527,1.1131363473263765,8.825753133445415 +528,-10.244261040786068,-8.081342842374916 +529,-8.579807002681788,-7.135852360203103 +530,1.763920120447527,9.845449563376503 +531,-5.51546353846694,-6.943804429871649 +532,-9.14405296164511,-8.195695340516782 +533,-9.035974710668595,-7.6202987380717735 +534,-3.2615633531068053,-6.701705025156346 +535,1.709937162564638,7.14722221654483 +536,-2.245666954425036,-6.883861614887863 +537,2.3677130438104808,9.106566897279938 +538,-9.458167693002014,-8.503042821677347 +539,-4.076697391592253,-8.019789647543242 +540,-9.036595223062285,-8.287709453993642 +541,0.6758589143112543,7.8500170460493335 +542,-3.75132705779662,-7.216694471611288 +543,-9.272126552693619,-8.548950171460847 +544,-9.554854832314836,-8.917633282884536 +545,-3.0596964259952006,-5.2968119716487685 +546,-9.802026620811418,-8.074413213875708 +547,2.2378712511043544,8.327043087398575 +548,-9.117971064828826,-5.667870797754498 +549,-8.569344288306812,-8.101000886809317 +550,0.9240764673458738,9.528352749876202 +551,-9.82004919711966,-5.70373762491799 +552,-7.630307522131641,-6.017151383129318 +553,-2.9524525381669573,-7.570534768535014 +554,-3.4125128861756826,-8.645715848727079 +555,-10.625270472104175,-8.931403459118163 +556,-4.694057475120292,-7.911445423833883 +557,-2.25619832583494,-6.65809120171658 +558,-9.2780474833441,-5.696778701875973 +559,-3.6784846740185855,-6.082444716669731 +560,-3.4293429064006276,-5.847821903492798 +561,1.9439709651388997,8.648073360413566 +562,1.4819223967537078,8.035868707853556 +563,-3.7699715009405357,-5.292806218828125 +564,-4.877792159215375,-6.009914407133835 +565,-8.803381135351215,-8.33315553434123 +566,1.0284225741506225,8.750446127211628 +567,-4.105377571728074,-7.401860844543055 +568,-9.685248150269778,-6.149500712625501 +569,-8.724619020013902,-6.343281306650992 +570,1.2889792874675887,9.189494923898836 +571,-3.7788197836319144,-6.833095678512737 +572,-9.522780233052375,-8.018520939310838 +573,0.017303362231853914,11.12668688502151 +574,-7.870209813144006,-7.871340705112821 +575,1.5463790222165257,5.981627896367072 +576,-9.50850094688113,-7.4139405643272625 +577,1.9962100293771334,8.70032989112369 +578,-3.6308999273375187,-7.1807564718528285 +579,-3.0599072191610253,-5.156944837721683 +580,-4.015838971377803,-5.628626943136464 +581,-4.3059399615101945,-7.0187684115528794 +582,-7.69025302026296,-7.981267867885812 +583,-9.25381132352528,-7.327874304532358 +584,-3.681895830814411,-6.992466985298881 +585,-9.93429116381224,-7.552250100807363 +586,-2.904660034160202,-7.175008970106031 +587,1.141690863237407,8.264628154046255 +588,-8.021619535904726,-7.629673175480269 +589,-9.634212614246236,-8.249125800630479 +590,2.102157219741975,8.860344265071555 +591,-7.817941391703549,-8.641911617703295 +592,-10.291511817990084,-7.748716057362189 +593,-2.355180558375901,-5.002047560018639 +594,-4.122747380585646,-6.653506344669552 +595,-4.258002300759403,-8.079546417995866 +596,2.894269851113211,6.4558135431821775 +597,1.4200834497833223,9.27022793492442 +598,1.1775973337547503,8.944446025590066 +599,-3.6292799609187383,-6.561517231896472 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_3.csv b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_3.csv new file mode 100755 index 0000000000..ffd208447d --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_3.csv @@ -0,0 +1,601 @@ +,0,1 +0,1.2255376526898032,9.141479217773881 +1,-4.297337043474307,-5.530916545335797 +2,-3.6087115377420087,-6.516947027948974 +3,1.7283245822395865,7.644297041081181 +4,2.394681190268282,9.679520106012909 +5,-8.540320716098458,-7.074720415703683 +6,1.5467345165080124,10.016750892066021 +7,1.116032103290741,7.389600128038769 +8,-8.564960747610074,-8.004840738257776 +9,-4.30699470103154,-7.210222308396672 +10,-8.324455580440977,-7.193745420077761 +11,-3.055746213116298,-5.738151226380286 +12,1.564107524552199,10.212558601072592 +13,0.3278426725521013,7.717067491079936 +14,-8.98260775540197,-8.445947915664142 +15,-8.148884610857909,-8.249832172954447 +16,-8.616360734466356,-7.991086807340608 +17,-4.995125679553251,-4.805418699792586 +18,-10.043205855988308,-8.38198798465999 +19,-5.117321263158294,-6.856273768969385 +20,1.376353630018705,9.192645073591452 +21,-4.251693900241542,-7.526106395251568 +22,-2.93596205716008,-7.236746464333438 +23,-8.808958437261914,-8.455460559549115 +24,-9.847650729901957,-7.851876184289234 +25,-11.048039228950888,-6.60393674080736 +26,-3.964409490913115,-5.299418515912615 +27,-7.0493565263673155,-8.740114248978552 +28,2.263311060790778,9.849075294679976 +29,-3.8024901458716647,-7.117190913344736 +30,-4.800964446022614,-7.504211423063539 +31,-8.870192169247716,-8.672027956256194 +32,-3.8386174296796276,-8.499766712668153 +33,2.4596144288951614,9.826078306198113 +34,-3.3803609919209965,-6.935355354171216 +35,-8.600888421369499,-8.081225402937271 +36,1.6990780728593955,8.026541885792696 +37,2.0234198603467184,8.078938358225274 +38,-7.861223420539712,-7.970030131717135 +39,-8.97420790996449,-8.139796716130322 +40,-4.274138163785766,-7.218007933270192 +41,-8.853199100620417,-8.693668385815021 +42,0.6026265514398386,10.98343483713762 +43,2.433240364895425,9.059774694791999 +44,-9.721084263354559,-6.5643821056343405 +45,-10.084805290383525,-6.870937627701416 +46,4.0936817036834015,8.872937261702587 +47,1.6929446489697524,8.304053977767918 +48,-2.8273918317995848,-5.174707032406225 +49,-3.6377369973212033,-5.981861133049716 +50,-9.127569607956952,-6.626326837736638 +51,-10.248572831708822,-7.548645388130816 +52,-7.855830617376127,-7.904014064483153 +53,-3.445179956310673,-6.464140555832437 +54,-2.6452978926620245,-6.439318643401205 +55,-6.39087632984327,-6.928394253403045 +56,3.5188547547156386,7.551672632060567 +57,-7.855783363668796,-8.094237123526709 +58,2.615776219644124,8.843205260668764 +59,-5.601959336375614,-7.439435156164771 +60,-8.87656188186118,-9.647791398501298 +61,-8.987652698755243,-8.313103579761762 +62,2.7378795714787665,8.846531747222823 +63,-7.108610717006393,-9.030089038622796 +64,-1.8963294779383923,-8.189264029877402 +65,4.071599487948465,7.716846493830238 +66,-3.6658654047687795,-7.47426414118374 +67,-4.035359477565178,-7.487029436292918 +68,-8.810170608211632,-8.379134703981288 +69,-8.20797702039906,-9.09686600955095 +70,-5.08018104907724,-7.2637114806271015 +71,0.6351331740071233,8.224606419467982 +72,-4.47822711506591,-5.352695184484038 +73,2.358168656987021,8.308710197655335 +74,1.004613507660405,9.410385162767355 +75,-9.361705481884158,-7.184962903676741 +76,-4.282438839606894,-5.53582099677129 +77,-4.1049753854859,-6.322210169062712 +78,-7.546042903965684,-8.210753335220534 +79,2.84173196981702,8.716906136750826 +80,-3.7536901854246327,-6.72560061365912 +81,-10.667769318904304,-7.643966842339937 +82,0.8150704800004155,7.6566085308868645 +83,-8.356933989588283,-8.589906034300064 +84,-8.801307660780632,-7.621502234241935 +85,-4.67280437236893,-7.0337426395333065 +86,-3.495159526615141,-7.7823625361508295 +87,2.0599669727110306,10.824626023348948 +88,-1.761515057182471,-8.248027717315765 +89,-9.092492983616513,-8.237898770994937 +90,-3.305040516751636,-7.26504640263574 +91,2.1227809787420706,9.194304511259828 +92,-3.145931808820589,-6.229095802364749 +93,2.889743289938168,8.929504976806188 +94,2.018863059789951,8.220482210983691 +95,-3.2196229174127033,-7.532972081961835 +96,-9.180006612215056,-7.060068395688889 +97,-7.986523717432862,-8.772168767696355 +98,-9.168083705283559,-6.822850835906491 +99,1.289775580475062,9.70271783585797 +100,1.2740322978213001,8.292177837742086 +101,-8.916527211291001,-7.656106436204208 +102,-9.700898376071635,-8.032415703940435 +103,0.9669161706345597,9.634344618512687 +104,-9.67270102358252,-6.908277651580308 +105,-9.564463430382272,-9.710466191751374 +106,2.2969425258369314,8.987649262904931 +107,-7.762023185100588,-6.743135723883691 +108,-9.255459961544892,-7.782531060262364 +109,-4.273137189588981,-6.127968184795229 +110,-3.7508291438510564,-7.1796650264303326 +111,-8.648220880324805,-7.172900439017217 +112,-8.658333970395889,-8.56713734626757 +113,-5.733518920545488,-5.010252542754743 +114,-2.3741665613138037,-6.357738603600945 +115,-1.9856093324216508,-7.617195339401777 +116,-6.704566032535892,-8.238334916994258 +117,-7.914571748360948,-8.921005996340748 +118,-7.58494093314669,-6.710379996663295 +119,-4.006968138276497,-7.798415227381543 +120,-10.00761330753919,-9.242893434719528 +121,-1.7128538688814134,-8.601998179774073 +122,1.2167337031493959,9.076757154496596 +123,1.6447608096299158,8.483163215726453 +124,-7.218522355872036,-6.5669042952039565 +125,-8.51184088492603,-6.910923174986802 +126,-3.1001125221811203,-5.295113344522024 +127,-8.73963648128209,-7.186510393418622 +128,-9.09764611141698,-7.534170213328472 +129,-9.780761540280578,-7.742981207657383 +130,-9.717247650860006,-8.77608768979335 +131,0.3003464179203046,8.95041049956845 +132,-3.5149896816997006,-7.909518081993374 +133,-7.963579115398975,-11.103666868682154 +134,-2.6241211231232815,-7.423013872519332 +135,0.8682862034790112,6.244997188436713 +136,-9.043625473149135,-6.909556510217293 +137,-8.730545963953597,-6.167287236441934 +138,2.836438826504372,7.731977744231563 +139,-10.136257512743747,-8.061638998745739 +140,2.5255796786313267,9.989763742863861 +141,-7.728567526641721,-8.516688228325041 +142,0.10498352981338144,6.936041644039058 +143,-9.878483086701534,-6.825949923646933 +144,2.1911668619430684,8.681924891111366 +145,-9.297423063665612,-7.006348028486722 +146,-7.399923203686196,-11.741207542012472 +147,-9.851057134480019,-7.664685319105955 +148,-5.821319298892842,-6.531365455033449 +149,-9.088485188036124,-7.266914238231228 +150,-10.508127705680996,-7.2971293307035925 +151,-2.776680306767184,-6.040444822335491 +152,-8.397895528644842,-8.939148856577797 +153,-2.6224385814925615,-5.942589208384663 +154,0.7126631210496013,8.427374154516944 +155,0.9069792571976608,9.431652824626497 +156,-4.941144074728792,-6.913310181617657 +157,-8.388660616639175,-9.781674687896398 +158,-10.15866878410844,-6.086199496080454 +159,-5.280330752512068,-5.8711826568517775 +160,-8.943632752777454,-9.021959929498285 +161,-4.2741222237074386,-7.468014406499503 +162,-8.111790708521093,-7.829244998012636 +163,0.034947129092518514,9.599563934157821 +164,-9.520977354855793,-8.34632175374682 +165,-7.157739814914419,-6.065387153893702 +166,-5.226995045988889,-6.111376787056115 +167,-8.22365310520065,-8.570490114550068 +168,-3.483702010774605,-8.258454646969218 +169,-4.675132818096483,-6.718383881062376 +170,0.8076322417226721,8.275565136333183 +171,-7.8602699422307944,-7.333797725105611 +172,1.979964867428537,9.865183123448809 +173,-7.947454000060463,-7.646911308269281 +174,-5.331960089565768,-6.341744172920323 +175,1.7885922368246647,8.340534722885964 +176,2.787746405882677,8.139148148558975 +177,-9.151914267743374,-7.569577432408877 +178,-9.129791842784071,-7.265252727817658 +179,-8.366843771779305,-6.496401516640613 +180,-7.1560198551097045,-7.7684193348064605 +181,-9.492772888554155,-7.319890377024424 +182,0.6053803088265532,8.075801111134975 +183,1.6434165607683415,10.271458125764944 +184,-2.739547260018664,-4.138099443059702 +185,-9.7037093490842,-8.790361694936095 +186,-2.7306182130527286,-7.208187200163216 +187,-8.134892576819396,-7.520714708050233 +188,-8.267745100216402,-8.385102540138817 +189,3.3415830473073695,8.734171325609722 +190,2.9311815000005996,9.143119401290065 +191,2.2860882129071807,7.846461236250382 +192,-3.6644673202519136,-6.438444904529135 +193,-7.523276825143395,-7.255807730935022 +194,1.868794551266115,9.76318232418679 +195,-3.296871001299072,-7.719880058593299 +196,1.5442319254883006,9.529188740813856 +197,2.3179624773871534,11.270953386109198 +198,-3.7262983945769776,-6.324564664256947 +199,-4.135269049518808,-5.070719500945035 +200,-3.9373005080229473,-5.689290940444298 +201,-7.205346385928806,-7.311384820544484 +202,-3.68907443689269,-9.752269221376734 +203,-4.685004081304692,-7.368110975680209 +204,-8.763720747411547,-9.290122994952846 +205,-8.872612765848631,-7.338108680713353 +206,-3.3042196564355644,-6.641155245652315 +207,-4.123653143016889,-5.644872475075601 +208,-4.766993102823317,-8.156348503780505 +209,-3.5700447726245157,-5.8794728348385785 +210,-4.132984951847543,-8.068499111058316 +211,-6.622827538964524,-7.551504765031613 +212,3.2137070158351415,7.4433402642109625 +213,-9.77406291453185,-8.293643281900614 +214,1.146176894713229,8.681501484663256 +215,1.2391984568734045,8.568405824674697 +216,-3.540082501244764,-7.192348313166246 +217,-7.532863915593086,-9.511638351944974 +218,-8.250126393231623,-8.764419022184814 +219,-7.482088206544333,-7.577130499788636 +220,-4.212302917624514,-5.289820445745089 +221,-8.261546908364274,-7.772718494251158 +222,-4.544762612440516,-4.842522228687262 +223,1.1387722979236394,9.192847076109373 +224,1.3535929181464974,8.647364618306227 +225,-3.612878181498939,-8.699470128470956 +226,-3.148044440176802,-5.1170201801674455 +227,1.354926291903442,9.499117985770674 +228,-8.65829008811044,-6.307401366393167 +229,1.2316993801929832,8.222465907459812 +230,-8.331872488428887,-4.853396673925863 +231,-9.100328499986468,-8.573730838333006 +232,-3.107786079685531,-6.1718712105409255 +233,3.2652035789984177,8.128662224265318 +234,-10.552602581625532,-6.761345255592396 +235,-8.907554671210626,-8.577143103377015 +236,1.4855632154992677,7.874244483310497 +237,-6.027901651287577,-6.787145221246898 +238,0.5556693033797273,11.480362416240872 +239,2.098624352712322,9.814242943395692 +240,-9.887555426056213,-6.710976909450727 +241,0.8649055384285145,9.097109122257002 +242,-11.078196900981547,-7.984561032228856 +243,2.316337179364928,8.598986846679601 +244,1.2640011485194367,8.82879738781108 +245,3.0267554273654462,6.784623431027779 +246,3.4220828134636765,8.502220645797342 +247,-2.4494211129155765,-4.999817909826381 +248,1.0421080438586254,8.824880864245943 +249,-8.051030898506728,-7.744423230554592 +250,-10.116651692250501,-8.275211300531558 +251,-8.202749106955345,-6.811801129305094 +252,-8.967894951789217,-8.509902072896494 +253,0.488437835066031,9.44417968077311 +254,-0.25003681933843014,8.865455583137448 +255,-9.585236818018899,-6.645739396359231 +256,0.9766413557554834,11.752064783270438 +257,-9.034868112790015,-7.374528147394214 +258,2.5822045654186043,9.102988870451187 +259,-2.0778811776069945,-5.786283877066672 +260,1.5787413115793543,9.044736891113242 +261,-2.33485326917092,-6.061955751331998 +262,-3.2895770824811423,-5.034392291746466 +263,-10.081565262534639,-7.374197032305851 +264,-3.994431608464796,-6.0162453845407935 +265,-4.232758796887437,-6.426632957640464 +266,-10.498691473054258,-6.403791006064546 +267,-2.292209986812295,-7.250831946240652 +268,-10.76508425595502,-7.165069717499894 +269,-8.386687189897946,-8.769216122642224 +270,-5.27510041598469,-7.945133906351278 +271,-2.334510993460131,-7.3989680996483465 +272,1.284797722990889,9.064382388513113 +273,-6.168646964600059,-6.8219172657965785 +274,-3.92701169853442,-6.704196762275968 +275,-2.6619749106971726,-6.704202485296508 +276,1.1478911738167819,7.512416897879306 +277,3.2510022512727446,7.3539410653768025 +278,-7.315315871979378,-5.88246686985582 +279,2.1117845497483563,11.076505278766518 +280,3.639056472297903,9.467060612350386 +281,-4.166374993413476,-6.431310496109624 +282,-8.726577750428884,-7.345021586443768 +283,1.6433220828690402,8.696077338068736 +284,1.8838280865620047,9.738013169059982 +285,-7.629361397843038,-7.55931022100682 +286,-7.912190771005928,-6.98790548068395 +287,-9.412741193816466,-8.791628726681072 +288,-9.74190411151045,-4.956844114033405 +289,-8.35965254228762,-7.669920828853046 +290,-2.801288589366356,-5.399769916668381 +291,0.2601192833910215,10.467333876493818 +292,-11.025210031015783,-8.136674217017307 +293,-0.06674706099380368,10.2899750307263 +294,1.067776664155953,8.750462573983851 +295,0.5509278352071858,10.273515136192767 +296,-9.884243025797673,-9.28367482506358 +297,-7.872243069788768,-7.757767318695286 +298,0.8084074017233095,9.656234963082685 +299,0.7624874038465008,8.947917897098035 +300,-6.332133121004648,-6.785874331813723 +301,-4.429652056501128,-7.578014459631968 +302,-4.188584113595973,-4.934755810797555 +303,3.6791257547345637,8.31690127193822 +304,1.51966280310663,9.29788229347068 +305,3.2102902579616233,8.295945923007892 +306,-11.779303109403571,-7.6837457438194265 +307,-9.216883150382246,-7.163067385221396 +308,-4.249121038157665,-6.5463368597678535 +309,1.6312431011720003,8.94177175465414 +310,-9.06559350244003,-6.148320200131417 +311,-10.015959896487981,-7.709434635038423 +312,-7.173319331442961,-9.256992837135826 +313,-9.060920902092729,-7.303562314018536 +314,2.312700300664332,7.139243673442236 +315,1.5809938429950523,10.007422841453208 +316,-8.54445075456971,-8.41948970598796 +317,-8.965879693065672,-8.447110955650984 +318,-9.54302474414816,-7.420209367781994 +319,1.8744747391345913,7.97319136754416 +320,-3.9052002880555103,-8.238735799274453 +321,1.310281830658,8.228855547316034 +322,-8.64301324545217,-7.027923448109183 +323,2.805369886850809,8.842471053078325 +324,-8.649270035579416,-6.535989617502658 +325,-3.39049806833454,-8.950043708641422 +326,-4.684398595708872,-7.537100814540029 +327,-2.7612003380209966,-5.035406026117489 +328,-10.13877327966241,-8.114347510336723 +329,-6.8544350435639885,-10.03456862183058 +330,-3.4750669023257132,-6.82654679089782 +331,-9.633501263903536,-9.055562467014843 +332,0.9048609502183895,11.673092427259427 +333,-4.2996488273011675,-6.010824368610892 +334,-8.771337671565963,-7.995483617420683 +335,1.6839223227818167,9.237547972212987 +336,-2.671508715890748,-6.069009146147523 +337,-4.397948091647258,-6.587398168180368 +338,-7.139508129041156,-7.821464761648635 +339,-9.475778540263686,-9.243996319844927 +340,-9.959314284171004,-7.285005944638585 +341,-7.839385017571539,-7.0330905675258295 +342,-7.868893904638215,-8.353276539871395 +343,1.3211888119472888,8.412687232783561 +344,2.6800131248732386,8.32815336253785 +345,-6.857701664404774,-6.452673448184665 +346,3.713139711261232,8.725335269085823 +347,-9.107446210080008,-7.921545298732012 +348,1.5893755906533062,9.98187640608331 +349,-10.159372029770498,-8.819216585856166 +350,1.4574743605281437,8.149245911727022 +351,-8.871582388403974,-8.559470119720018 +352,2.2751058610991945,9.901232247753237 +353,-2.7472542355419822,-6.124315203745871 +354,-2.656511821980372,-7.568181529571761 +355,3.0291019009630196,8.963009764693098 +356,-4.161637206516404,-6.81038562883428 +357,2.4730627290950755,9.091190792250169 +358,1.1200833219060131,7.833226207919465 +359,-9.070217965415747,-8.54140715137701 +360,1.0982393965124597,10.043973011835417 +361,-4.703066488892138,-5.423187161551743 +362,-3.357780102370316,-7.157938971939569 +363,-3.8242194656559905,-7.918767533053278 +364,1.1317031057022624,7.430132519964122 +365,-3.1464889947369326,-6.132635881498026 +366,-3.58162916465379,-6.68711506421579 +367,1.1996135191507407,9.006723457478385 +368,-9.45763051539227,-6.400190829645651 +369,-9.014623623797148,-6.574091480997232 +370,2.333568958298026,7.5107128468109 +371,-9.268111132437175,-7.69591073961429 +372,2.5920778736471224,6.5826104754205 +373,-1.225882906950281,-5.666669443257231 +374,-8.325853476844289,-7.793387810859259 +375,0.5055059451451487,8.966183931370773 +376,-4.978226962396014,-6.541022557172996 +377,-8.612561840486487,-5.163326887625027 +378,-9.271682378408538,-7.475451620888086 +379,-8.27293191600384,-6.776477142175483 +380,-9.006055108715797,-7.844080334639579 +381,-3.5485851032861455,-7.428944065491923 +382,-10.686134323492006,-9.763820650368935 +383,-3.0800097299068763,-8.15854472578761 +384,-4.255691548792045,-8.38292415298679 +385,-3.307254052685989,-7.470134723775772 +386,-9.321496507840077,-6.842675169149333 +387,-4.7321768780160465,-6.822753330631313 +388,2.230652092511753,9.209909160266504 +389,-10.199387433348697,-7.21773120699181 +390,-3.4127519199251655,-5.185028443964861 +391,-3.8416188751016542,-7.533373329866099 +392,2.1132609712972874,9.020452298425571 +393,-0.5325780055907829,8.484331396616149 +394,1.8445158417826104,8.81605756196587 +395,2.3584336367199685,9.97789937430074 +396,-3.568501887826641,-5.088851488135153 +397,-9.568684348087837,-5.253772154590987 +398,1.6852283040068348,9.30206036715821 +399,2.1342159865596315,8.910948360147488 +400,-6.629342939316992,-7.850706578446751 +401,2.0937767705468238,10.684273022018974 +402,-9.676217713369718,-7.313906906384972 +403,1.9570383690447184,10.003520022659853 +404,-8.939965060567271,-7.898097303618171 +405,1.4808132185460936,7.999137784006351 +406,2.3194719866422937,9.036137023298723 +407,-8.835902135605371,-7.052629529645275 +408,-8.238054814755314,-9.249598787232223 +409,-4.660901299028566,-7.706916900944542 +410,0.3783309638984087,8.290738393471463 +411,-5.015776514744971,-6.519066632817204 +412,-8.421053394915008,-8.736718539801242 +413,-4.435649194270888,-6.696459121403307 +414,2.247926822227134,9.57329133500239 +415,2.0222130431499963,6.975811120753269 +416,-10.306964876414497,-8.736415116583338 +417,-9.709449108929526,-5.153622128090238 +418,-4.017799127595082,-6.995636771418465 +419,-10.076959087536602,-6.21256389676847 +420,2.659238286258088,8.806459088474094 +421,-3.548608592940656,-6.236718753216755 +422,-4.62073236673274,-5.675859560710455 +423,1.5425505794868228,10.321222555804594 +424,-4.704008819633645,-6.048358572352397 +425,-4.379166498786982,-6.027328721967298 +426,-2.1851097524355154,-5.4972225996114386 +427,-1.5981643079912309,-7.268515428709705 +428,2.6828651920893583,7.562270572067991 +429,-5.0291400786305225,-6.489316980749896 +430,-8.90568480456124,-7.233692326868969 +431,-5.004939817646501,-6.66263935533808 +432,-3.710438396645244,-7.47145550649408 +433,-8.431518653354582,-6.547806268928545 +434,-4.961399314441151,-5.517757584761213 +435,-4.424875928292114,-4.935088672171116 +436,-8.465630223843888,-7.526658276960419 +437,1.1232273055419215,8.848543927546967 +438,1.6596342752172086,9.141054895640758 +439,0.7119140054568496,8.724208673911308 +440,1.0450609455563007,10.255952934802293 +441,-4.467548041647252,-5.754908323737896 +442,0.7968199534070038,9.810017983913482 +443,-4.635062452279791,-6.319914205163661 +444,1.1557559471601326,9.393630939514255 +445,-7.2189693831757324,-8.996252323908598 +446,-4.04121798967872,-5.9680578737510555 +447,-9.280259016217148,-7.344296039493729 +448,0.12990583915647091,9.534366746563984 +449,-9.080348829541355,-6.177346713521466 +450,-4.122544048604146,-7.424318232165486 +451,-8.523822110731878,-7.273222957182741 +452,2.4144759026265117,10.149542222925607 +453,-4.264806434701379,-7.993804495829595 +454,-8.843006328078824,-7.25237245189648 +455,-7.49037786278734,-7.149099301981595 +456,-5.072992400771182,-7.044488347703606 +457,-8.984565831557692,-7.556584668377714 +458,2.557179024476171,7.829957672041701 +459,-8.734624814022123,-7.572386626294366 +460,1.2351440435139123,9.43909831047302 +461,-3.3594653020514693,-8.995232214949041 +462,-9.701004421599954,-8.024999268149845 +463,-3.911609241339809,-6.404433416805523 +464,-2.643065906781685,-7.153288162400494 +465,-4.362786615686318,-8.81772389317671 +466,1.5120081296131147,9.911223821403949 +467,-8.531551914301003,-10.090011710251812 +468,-6.023729176014786,-7.112820404050528 +469,-2.5985519868592517,-6.4999945557865235 +470,-8.83500699255377,-7.718829054623505 +471,-5.75919608049208,-8.762533328160302 +472,-4.156775252969363,-5.923221304091207 +473,-0.382966756977273,6.909940417424705 +474,-3.7143796750446025,-5.330378058802522 +475,-4.931516866558665,-6.9273383758445854 +476,-8.192359305960302,-7.966452982690816 +477,-3.725534195535071,-6.923575094320778 +478,-1.6567493690717532,8.068007354960876 +479,-2.146889726406772,-5.438731120358826 +480,1.6640559067378071,9.675773158258412 +481,0.18778248663288855,8.428880250673016 +482,1.2712054384816953,9.5936730208319 +483,-9.777477551079443,-7.756277642160135 +484,1.2220274964294986,7.937710345285722 +485,-9.798771689602779,-7.310089107176487 +486,-2.5769948606619275,-5.236907832855168 +487,-3.280039956630294,-5.921645113967604 +488,-10.385438473456237,-8.978699614287862 +489,-3.538238933516765,-7.776971810237942 +490,-2.370442677763312,-7.492491016579457 +491,-9.22774914798459,-8.210795315347276 +492,-3.5970918987451044,-6.287436181495865 +493,-8.517183803198076,-6.662349844790031 +494,-8.287708904391959,-8.15649593679152 +495,-9.135473938940752,-7.855739524229387 +496,-8.669299782592173,-8.170797467143105 +497,-10.005994810578429,-7.936401567132531 +498,-8.544223415826707,-7.8982358071396135 +499,1.380211293463183,7.933133954036769 +500,-7.9655997110381795,-6.941204946690403 +501,-3.465161569489663,-6.077641920307408 +502,1.83638322311871,8.903049115882464 +503,4.120788211597215,9.110648038878697 +504,-2.794003193070597,-6.345799773867606 +505,-3.8239238176980557,-9.107238271082618 +506,0.7875933191763557,11.184572255529599 +507,-9.252786356666299,-6.843383729987514 +508,-7.998189201610247,-8.415058408832383 +509,-8.16094072563349,-7.583334007895792 +510,-11.05487333596034,-7.736249582486785 +511,1.8437259315731145,8.988020831895229 +512,-8.699105002659575,-5.307239816191379 +513,-2.466750766070521,-6.512870969238753 +514,3.410874103126626,7.867497335793834 +515,-9.752795331013969,-8.67435328723578 +516,-8.466944781470925,-6.791226698236835 +517,-10.616759196450891,-6.880881334143791 +518,0.43176792520881224,9.550687380593555 +519,3.736985587600425,8.853374927229641 +520,-4.532165464595547,-7.339471392762515 +521,-4.0812752157406305,-4.625339795130051 +522,-2.834654789895266,-5.783233762871275 +523,3.1267617202318103,8.886048721823393 +524,2.5997957103384843,9.835592180820424 +525,-2.8382644386960774,-6.663377478979936 +526,-4.883073540738554,-4.148753193485065 +527,-4.441561297964279,-4.69256538859622 +528,-0.3798796161990645,8.96320635596488 +529,-3.7109403060725477,-8.147828020606191 +530,-10.452756650029135,-8.807450425963179 +531,-9.820558778263809,-7.31072935827564 +532,-0.42268912766815525,8.631901499535282 +533,-4.093024424548601,-6.675411350292116 +534,-4.254640033558407,-6.478024511999712 +535,3.0761166422810886,11.077524125411133 +536,2.25594594426518,9.071468737291259 +537,-10.114562377154575,-6.719087017466592 +538,3.205132879039055,7.497445881908755 +539,-10.267700148068077,-6.713722100232483 +540,-10.417438485978314,-8.465832867634974 +541,-2.5164847102881867,-5.242531699126268 +542,-2.698794657384064,-6.103109872551561 +543,3.8165342813345537,8.841505234091182 +544,-10.227608589045904,-8.195906924722983 +545,-3.663699867237535,-8.27674770897949 +546,-3.261706481959633,-6.159224936936016 +547,-4.334510759437645,-5.956654161389583 +548,0.6999551100635917,9.42591422972624 +549,-9.151125616059769,-7.0551031934317905 +550,-9.223902039701933,-6.860062637838487 +551,-7.4615598493100865,-8.528019264479758 +552,-2.4380734875462897,-7.507277903677772 +553,-4.026642611221013,-5.110849404862844 +554,-9.389621272355004,-9.397979778210841 +555,1.7150238377255527,9.620244522910358 +556,-11.309469090090785,-7.9345599863656355 +557,-1.508483049723969,-5.969192439095569 +558,1.5454178016331435,9.049535083232207 +559,0.6647835495149763,10.331377822257668 +560,-10.500536041382844,-8.146345516551253 +561,-3.9299577253687468,-6.9635497993940065 +562,0.8411312844844993,8.368249720855683 +563,2.9506985987058885,10.331232109962894 +564,1.0316182325791265,8.770754440676154 +565,-4.312763375043856,-7.412307169564694 +566,3.1563371086285135,10.395086485340384 +567,2.002138375134201,8.547140346608217 +568,1.6923235633247864,7.353260846660685 +569,2.473672440958805,9.136461866113027 +570,-10.40814549184069,-9.118682041635388 +571,-4.065550600052492,-6.905831922608004 +572,0.2922467951737049,9.546441508105172 +573,1.5891585983241838,8.18355792173308 +574,1.4248639028982657,9.728308665738702 +575,-3.9669243990361025,-7.319350698393833 +576,-8.265092850484752,-7.42655563395339 +577,1.7895143670171882,7.819868806232385 +578,2.1336669469904095,9.948097835862201 +579,2.421396986118362,7.483100182629542 +580,-8.861580320724723,-5.474171763623955 +581,-8.90503421798681,-7.311777925305353 +582,-8.436633589948826,-8.523758539750995 +583,-5.049611580771868,-7.364445627614865 +584,0.7073023937231189,6.509679270754108 +585,1.2552894767144336,6.843131169029357 +586,-4.003962548081533,-9.027134085227624 +587,-4.49821963772513,-7.656193217995114 +588,-4.4339247440343454,-5.309484171372889 +589,1.2819691769435653,7.412341364219434 +590,-4.450143227312109,-6.59708814346974 +591,-4.499151353997774,-4.982990326147527 +592,-9.965975298915485,-7.638169628639451 +593,2.7968100908721762,9.0415309589172 +594,-3.4947283373962827,-5.86149606270687 +595,0.8206306411514246,9.044253333258009 +596,-5.021684732671406,-6.862529076101957 +597,1.5626206680018402,7.537251089399232 +598,-4.731861244222261,-8.35463204807969 +599,0.02102828690014613,10.047689525469467 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_4.csv b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_4.csv new file mode 100755 index 0000000000..98ee92ecea --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/data/distributed_data/daal4py_Distributed_Kmeans_4.csv @@ -0,0 +1,601 @@ +,0,1 +0,0.10332010330206987,7.433057527844319 +1,-11.457080958126577,-6.770419344202086 +2,1.3549596047187271,7.700376028699749 +3,3.778150756893532,8.684061546713968 +4,-7.383885961653566,-8.618036742404994 +5,-0.39730229489662605,9.938300708523482 +6,0.5160441373354219,8.083982733069245 +7,-4.40642837789918,-4.6250083216575355 +8,-2.021319778039908,-7.629105587345337 +9,-8.69643931304748,-8.853871367088468 +10,-2.850870377910395,-7.749521305500871 +11,-3.8889488635523684,-7.407902098242759 +12,1.7829979167708168,10.567535943918909 +13,-3.519846055844178,-6.176139856637121 +14,1.5542403836477212,9.658471608302795 +15,0.4492306834259767,8.865118438070576 +16,-8.931885192609018,-6.637687003560089 +17,0.9532438492185421,8.939440062166604 +18,-4.732335952802864,-6.611045721670156 +19,1.2625408752904765,9.241926407970649 +20,-9.07086649044261,-8.134895879104807 +21,-8.036931586085002,-9.175845150648595 +22,3.11033574236728,9.120552071901347 +23,2.1275057983269905,8.28756053994324 +24,-2.3684112074313166,-5.886028660737004 +25,-8.964985592313594,-9.366382464393025 +26,-2.803686561279893,-8.069423213817318 +27,0.9129238925859261,8.650125187645022 +28,-11.08759019781802,-8.723907914360367 +29,-9.27887309508911,-6.765499402144199 +30,-4.765684514060551,-6.551347862368417 +31,-9.930360118635935,-7.083002016140143 +32,-8.211276870786092,-8.929597262859707 +33,0.8393549155970421,10.413232778704002 +34,1.486733016242737,7.202634962739694 +35,3.0575774682065946,7.482802373308799 +36,-8.564323295750087,-8.716844652132215 +37,-9.55725601240105,-7.310207215679147 +38,-8.901238507527413,-7.637897364610855 +39,-8.974367376520202,-8.55745587738768 +40,-4.320376872754786,-8.466550038397596 +41,-0.49258782785101607,10.716693498249228 +42,-4.6352245569022035,-5.336299582614836 +43,-9.099679594367988,-8.485814462290483 +44,-9.714631488909273,-5.22939458647355 +45,1.6501597558146126,8.430373326754374 +46,2.1405083621048875,9.1996702543802 +47,-5.149397117223147,-5.125656239754709 +48,-8.754121162974524,-9.53297997717857 +49,-4.175216584819035,-4.824835319920182 +50,-11.032739833239267,-6.190047430065487 +51,-3.3437310422037836,-4.953461730138913 +52,-4.903394237228955,-5.637326752057649 +53,0.635594613159898,7.2205676497228515 +54,-2.285295507751895,-5.067705899188625 +55,1.6287755069412115,8.090862721219649 +56,-8.488947479715254,-8.203791664131273 +57,1.6046322122303396,8.581881077986456 +58,-2.7451963356907845,-5.528097876400736 +59,1.3000010418976555,8.25061178560787 +60,-8.575068374111153,-7.743158262247462 +61,-3.0032342702015367,-6.879693561855583 +62,-2.3903311387601063,-6.617410057610613 +63,1.4914864078134888,8.385099919504881 +64,1.1475566710983691,9.255035510930956 +65,1.7011767872758812,9.468439392758494 +66,-3.723267948794898,-4.5334697958159484 +67,1.9338982595865435,9.075252553317402 +68,-9.454275646601058,-8.146236472154563 +69,-8.614498108394772,-7.930028618297809 +70,3.3058229783134343,8.985672226433286 +71,-7.453263421868005,-9.589499666243544 +72,-9.051131862237169,-9.040052762503546 +73,-8.332827063749622,-7.309261538789444 +74,-4.182313849871069,-5.646287098804299 +75,-9.548574515271493,-8.321835170547535 +76,2.7279018461305022,7.883227926925386 +77,-9.169419979477773,-8.179717957397253 +78,-2.9137491598286718,-4.946211982196632 +79,-4.576965064541776,-6.648301200185011 +80,-8.396218896667257,-6.182254078938222 +81,-4.064800602702525,-6.496690345077638 +82,-4.292125187261824,-3.8110544249712213 +83,1.080149538643875,8.208812212381472 +84,-8.14636036309612,-8.127046141511695 +85,-9.72981639348356,-6.485715224054457 +86,-9.9914366518976,-7.251547617157808 +87,2.430874145068557,9.622705735890477 +88,1.3088331161191342,8.273567356619495 +89,-9.552102393402203,-9.242234279567885 +90,-0.05324114522390522,8.161757992806596 +91,-8.939521887926572,-6.698579044615198 +92,1.2400373350855958,10.660273458611586 +93,1.0170328792607584,8.446784945275587 +94,2.8433776388033603,9.57957504440497 +95,1.9226053681179893,9.619604157171334 +96,-2.822978961795278,-6.577214478461163 +97,-0.744358881914156,9.602600862844417 +98,-3.2159513863608074,-6.205447638523938 +99,-3.3613726180816954,-6.015048326689677 +100,-1.028033098050419,-6.143185893865859 +101,-4.514068308320062,-8.77832980959645 +102,-10.621848014522701,-9.208719953912157 +103,-4.114696761001476,-5.8474981315029275 +104,1.4620500318015608,8.251448431036307 +105,-8.716980262412852,-7.401842999545523 +106,-10.480874347578267,-8.006256173493918 +107,-8.278854397415845,-6.956830111630074 +108,-3.301118306039926,-7.476786197915821 +109,2.3171065628512504,8.212345073816197 +110,1.710303138927003,8.813298496766798 +111,1.7608999943759904,8.196426898269944 +112,-9.049000425789696,-7.445395642780055 +113,3.3578958689069687,8.391684342227013 +114,-5.257799254544944,-6.640631405280244 +115,-10.243977422361972,-7.510371347855795 +116,-4.550134137239694,-5.856090796346537 +117,-3.7054788079266467,-6.680665785379875 +118,-5.099719716889362,-8.281616138774524 +119,-3.3630255444771007,-7.8152323872476686 +120,-5.053107949784717,-5.714018343138573 +121,-4.996014479508486,-6.867452649832137 +122,-9.79087311549495,-7.990696121215509 +123,-9.894952432863954,-5.919664972952979 +124,-7.998077960678973,-7.466145043480028 +125,-7.626417104394374,-7.8970534800882035 +126,-4.1881383908819405,-6.939057961967295 +127,2.34960172025711,7.739116063181076 +128,-3.1417708078401354,-7.629837732353748 +129,0.9289305865777172,9.988033988591333 +130,-2.164421767072346,-7.50649207969462 +131,0.41853000838584387,10.110931422144121 +132,0.7400807311324775,8.508373501577271 +133,-1.1364834746249484,-7.709871462628085 +134,-3.718475171665707,-7.322498643982633 +135,-8.56665079533079,-6.186519160575655 +136,2.747364757047606,8.91244845395835 +137,-3.7208995875130535,-5.025008292927113 +138,0.7392223115055112,9.315925989714465 +139,-1.3087755832537313,9.014376965679181 +140,-8.582365743135174,-7.856649895105258 +141,0.7993005612885375,7.917111342530812 +142,1.6563928814647313,9.617357563577665 +143,2.085824386959823,9.599755735558062 +144,1.5013443722140052,10.722417673062296 +145,2.1754135426933945,8.720714517175313 +146,2.3588382752863053,8.614914851805098 +147,1.211438801211038,8.42677120938315 +148,-1.5745761586324107,-7.183785235449116 +149,-2.964766675561709,-7.489589496345692 +150,-8.910820704195393,-6.919512593396113 +151,-8.9816280693271,-7.287428022339447 +152,1.6571173127857766,9.234840296892475 +153,-8.924784980289425,-8.025684926024711 +154,2.2686724726124075,8.308204858154415 +155,-8.200048485170736,-5.932219763863781 +156,1.8919238459888035,8.969986201271237 +157,-2.441642321765225,-7.100833966853025 +158,-7.033715194515465,-6.970109573816821 +159,-9.773935882768308,-5.724722764925062 +160,-8.601410625703446,-6.949078220901538 +161,2.5210681149799514,10.061607818392295 +162,-9.127335669111101,-8.3008763692738 +163,-9.709771657142020,-7.444212662130526 +164,-2.5700986302088538,-6.987966188150011 +165,-0.7702213291329225,7.574082930390704 +166,-4.4719799493922086,-5.409031535461088 +167,-3.750631879920109,-6.200685420473867 +168,-2.5302437103150535,-7.25712894315796 +169,0.8481158875951356,8.36938393164685 +170,0.18493531626255533,8.26694017721234 +171,-2.977230529674405,-6.680446391670111 +172,-5.882850122238738,-5.492789105806717 +173,2.296876263463775,10.533791321000571 +174,-4.157111089052275,-6.271131973108935 +175,-7.648711768400524,-7.5250019570491835 +176,-3.052565904860986,-7.950607586383728 +177,-0.11348035149120594,9.720185255137826 +178,-2.8593918126369857,-6.419098005011792 +179,-3.1105485621828706,-6.552547464236643 +180,1.6544164300532487,8.585517480127624 +181,-4.4613354967678065,-5.356868999473551 +182,-4.269920276314392,-5.279118977120863 +183,-1.9557780319538798,-5.854154911905707 +184,-9.498374262961063,-8.736327789643028 +185,-3.577803410517134,-6.3192637115085875 +186,-3.597871160549303,-6.181116722975659 +187,-4.177092821608165,-6.056579511534875 +188,-8.111083896430419,-8.277691411511093 +189,2.152045243993091,11.373438292614141 +190,1.0155641264093118,8.156438295352316 +191,-9.924268070310191,-6.630830852887261 +192,3.18360240206096,9.56007164758904 +193,-7.83078901744347,-7.692154085752587 +194,-3.6288843509443485,-7.329206450334352 +195,-4.469054874961147,-5.30409106500506 +196,-9.348956010813758,-7.344898106201228 +197,1.3866028257974798,9.45263507444789 +198,-3.2655886263343787,-7.534884911872669 +199,-4.099344992744675,-7.242477596850821 +200,2.109585403235858,8.082138286177194 +201,-2.6189700083390726,-4.293503587445704 +202,-0.5242851270504048,10.905414559024496 +203,-4.004655195359291,-5.475853674307111 +204,-9.432373582735602,-9.362717304474161 +205,1.4790975677294698,8.71509103766059 +206,3.1125907560604875,8.195264328734265 +207,-1.4908037421015017,-3.467285706166175 +208,-4.998111987077401,-8.12384626655243 +209,-2.959755619948657,-5.261842477246686 +210,-3.4688681427114894,-6.781982463262677 +211,-3.0564543070606556,-5.284221521163483 +212,-3.534702754309033,-6.805496549497049 +213,-4.74454012304712,-6.741814055899147 +214,-7.700936300263158,-8.60269464330942 +215,1.1791405914420183,8.612798281538595 +216,-5.245417900815677,-7.799109362884348 +217,-4.709874141302528,-6.272734950183002 +218,-7.477604407582303,-6.86011311708189 +219,1.4217403567022842,7.568046564888566 +220,-4.192264218725802,-6.954960331045015 +221,1.9807052384963193,8.660580460372561 +222,-3.627977418159731,-5.704035537448408 +223,-3.6160280365046327,-6.791686350240313 +224,-8.783511942118828,-7.686120915673679 +225,-3.1308372171133203,-5.742420381991395 +226,-5.52144948144214,-9.003088074639763 +227,-5.486164657388887,-6.98593906653643 +228,-4.156474586100949,-8.87506003747911 +229,-8.667564519104046,-6.744364238166 +230,2.1607196671238063,8.706563384883532 +231,1.3910987282344514,8.438017350017722 +232,-1.7846284549350655,-6.262239014702814 +233,-2.854713779303287,-7.0976957664158205 +234,-4.273746686743,-6.853167487164358 +235,-10.61782516124727,-8.558369357907177 +236,1.2187745068862288,8.110594277503289 +237,-3.217943234654223,-7.6804732209951165 +238,-3.9982576778797805,-7.505720398808794 +239,-9.92166044372743,-6.380166184966641 +240,-2.1104312634549798,-5.98678030303904 +241,-9.11078914175272,-8.502898695567145 +242,-4.404724148057582,-7.027673783311267 +243,1.4046346166894423,8.160547966311894 +244,1.8085737289713733,8.74066220723949 +245,0.28873537383188985,8.08140213929215 +246,-10.912806528631155,-7.739077680114303 +247,-10.467507832971958,-7.446586715066022 +248,2.859993801772723,8.238512486164346 +249,-3.3925299450589783,-7.214307850771746 +250,-4.741101632204018,-9.302406019691876 +251,0.6431995533037844,10.677337968545057 +252,-3.6834309946362778,-6.163620275639666 +253,-7.160588123680955,-7.999296011074655 +254,-8.406409357621815,-8.549109163011591 +255,-9.796003209558192,-7.779235768767059 +256,-7.785379338163239,-7.670928916501254 +257,2.402902221889004,9.178654547818025 +258,-3.9317733571520757,-6.888115412065522 +259,2.671849682572569,9.671854709207393 +260,-3.6831851093579417,-6.54062430384994 +261,-3.5530171441598974,-6.057034504236865 +262,-9.587474977945119,-6.5942830768698 +263,-8.331110781524744,-7.979672709454525 +264,-9.983737552654869,-7.020393931935233 +265,-4.877119889593441,-6.833803262685101 +266,-10.273359386544174,-9.152157876102272 +267,-10.462614802859681,-5.70811623533384 +268,2.3905941594302975,9.668424236419554 +269,-3.145674163416154,-6.925361659708869 +270,-8.772849374639128,-6.185526439282244 +271,-3.5401553464857303,-6.412623215630819 +272,-2.8308976895683333,-5.476156303082146 +273,-3.801975441372357,-8.17266581321063 +274,1.593478138989502,8.2283409057532 +275,1.1083736830525108,10.532946457515768 +276,-8.629021896198118,-7.288223300098911 +277,-4.244459466636461,-6.0044203274629355 +278,-8.358098042025627,-7.464441843191975 +279,0.5464259235576603,9.02654928587277 +280,3.2865148124029586,9.97054220915403 +281,1.6018509291466354,8.673197748734717 +282,1.4388645100714368,9.089227443346653 +283,-9.454027524648394,-6.637313223083979 +284,3.0590401094108373,7.858006030766688 +285,-7.526054504923103,-5.614511901600036 +286,1.2346751153152835,9.906582395613189 +287,-0.13016010738166028,9.36276717952036 +288,1.6353480069600483,8.883535422674692 +289,0.8501890864463089,8.704078528515916 +290,-3.6317772689427503,-6.04076375260842 +291,-8.553679599109792,-6.568814403766624 +292,-9.477619958416945,-9.055024624943108 +293,2.035675808379171,10.653350117273046 +294,-9.337402974469537,-5.728922196268389 +295,-11.31124421531365,-7.05723670724196 +296,-9.80698762368018,-8.375912447746234 +297,3.7373452335031443,9.321632220693063 +298,-4.011483512512924,-4.674986311903249 +299,-8.209537548523056,-8.356640196985037 +300,1.4749171979243114,10.028770530826403 +301,-7.972809098669269,-8.12381123289332 +302,2.2334887410176107,8.895114750863513 +303,-7.739791719131931,-6.214711879386799 +304,-8.70596283786291,-8.938921350708586 +305,0.8703213918463835,9.763785265754917 +306,-8.919286182232902,-8.584764544460372 +307,-8.11023140480786,-7.273682228244324 +308,-9.401987062380314,-9.103042776307934 +309,-9.689268054510414,-7.539979581889578 +310,-8.126157836206835,-6.778596427706855 +311,-4.0088121043655764,-5.356961822843008 +312,-4.236986989126743,-5.101553477004014 +313,-3.9533622236031305,-6.584825956670251 +314,-10.712363186343154,-9.907687244750802 +315,-4.045834408112561,-7.579783105822965 +316,1.5182309232334954,8.2963837721969 +317,-8.550430783632796,-6.5497827981160075 +318,1.6743982864281177,8.67229649069847 +319,1.4673441728410779,8.500504684629885 +320,-5.496968720655763,-6.3815348615910406 +321,-3.4320350436309432,-6.824549120125649 +322,-2.246472391300614,-6.128247237773868 +323,3.425436359090328,8.763192875496575 +324,-3.9917769596693673,-5.975766005177232 +325,-6.1147581304556935,-6.190005472690292 +326,1.3619645984107212,10.892414824558076 +327,-11.040559642110845,-8.212690211351045 +328,-2.174036274319838,-5.558392044639329 +329,-1.6633785293624905,-6.265876809324602 +330,-8.30217337077351,-9.488909011978945 +331,-7.845110222680548,-6.891976649453214 +332,-9.367428022519887,-6.432481825948209 +333,1.2776367034010043,8.419682111069969 +334,0.9213062898601015,8.777678054509824 +335,-8.527920863931795,-5.531575951306733 +336,-4.2241669725178435,-6.792482455826352 +337,1.7667613765414085,9.499948597371562 +338,2.175659723569063,9.344174814918937 +339,-3.5592061683626133,-6.675762326267785 +340,-3.5310059944137207,-6.483067194066182 +341,1.0749452601168323,8.650513425496522 +342,-5.143341686933297,-6.700188180597839 +343,1.1540938158736873,7.118490003220929 +344,-3.7099080347150095,-8.390944999854256 +345,-9.398266042099355,-7.595020573828141 +346,-3.32663089696943,-6.822234294782555 +347,0.06574279875473032,8.748278513754103 +348,2.8352533700966123,9.45075925759949 +349,3.5651458584407374,6.451249532135987 +350,-8.026546721361738,-6.381019100481007 +351,0.7519981107452905,8.300478310236869 +352,-0.7483423612814932,8.004091220777179 +353,1.347951038529854,9.43929152971695 +354,-3.4193954359078695,-6.467844026656739 +355,1.1573664822385257,8.80959176919287 +356,0.016123419703701014,10.230923549284014 +357,-8.443009865153714,-7.959899179528147 +358,-3.9249510992305585,-5.121258294863617 +359,-7.9397064008801275,-8.363044452201077 +360,-8.642116526299922,-5.006428787826313 +361,-4.145879952677833,-8.134904918495604 +362,-3.2457991193774727,-6.107013567899146 +363,-9.029744286835285,-6.036258976308376 +364,0.0065644783216172176,8.97018307243166 +365,-9.673086344121625,-6.591968603858424 +366,-3.759004993831228,-6.30231583740271 +367,-10.32317524410533,-7.5513698353789085 +368,2.4775183767038893,9.54338392728662 +369,-4.316155291672273,-5.196642968528018 +370,2.028997494940821,9.170247459695451 +371,-8.070898868532892,-7.460648283641237 +372,-10.061493844560447,-8.172322328081924 +373,2.304021158783734,8.351238594157417 +374,-4.900125772010646,-5.702253875931586 +375,-9.49837898421794,-9.207519030981446 +376,-3.5324222528590714,-7.587224174570254 +377,1.194306183664361,9.212586422860277 +378,-3.5369957622034076,-7.1052986574312955 +379,2.884330925702126,7.512347577176563 +380,-5.234794047090055,-9.855029455960205 +381,2.686699888092497,8.406337969272872 +382,1.4052712698315613,9.544165777014138 +383,3.0401246351813898,8.980268002406419 +384,-9.40180266225954,-7.469832906343165 +385,0.4529517325667254,8.989794896740293 +386,2.8024351458748833,9.804051065392992 +387,-9.27559686944533,-6.929625232934977 +388,2.737937181321032,8.547815544851781 +389,-3.3677928967889055,-5.891101470343494 +390,-9.856978945720277,-7.346416050852512 +391,-3.13748303886494,-5.747958319194765 +392,-8.026813755262788,-8.718162563492122 +393,-9.485468757044574,-8.768424261724446 +394,1.6249405527760306,9.540571251809629 +395,-2.6063334783739194,-6.956578993790879 +396,-4.84985808744287,-6.770874009149754 +397,-4.525907372363473,-6.1069568389530104 +398,-4.723989289922966,-5.660033704993747 +399,-10.13349528069687,-7.633357736272925 +400,-3.3869513833525042,-6.080778240391612 +401,-3.758108955113767,-8.234590752370567 +402,-4.269975499999625,-5.67628904605929 +403,3.6302030738962983,9.261585481262125 +404,-4.907342450755322,-6.442265391426007 +405,-9.716354868345709,-6.832040088737648 +406,-9.62155214151273,-8.144201874844567 +407,-3.7954904093471575,-4.387877920118073 +408,1.0840211539446736,9.91191638500914 +409,1.8864266102008502,9.427349758224441 +410,-5.6090921969231475,-6.808196491662715 +411,-8.765111618596253,-7.341162120979236 +412,-8.423016337331168,-8.175796863871167 +413,-3.8033475331495126,-7.135895145037555 +414,1.194645243023539,9.237249135567533 +415,2.1350904063644798,8.808176512652985 +416,-8.502877013804621,-5.838240491795165 +417,-3.6439747332939834,-7.0526876957594045 +418,-9.157601295562374,-8.62908788389255 +419,-10.32462697151651,-8.240160867274547 +420,-3.1976209294393048,-6.766212622136209 +421,0.22636579468006368,9.782266383411494 +422,-6.071101186595537,-6.334559114232327 +423,-4.13424363559873,-7.2230560364136815 +424,2.8866308846997635,9.48403941471309 +425,1.631738769356994,9.049283963637885 +426,-7.711497872027216,-6.918915081058153 +427,1.5945344992892474,8.355559947118106 +428,1.6105302308281981,8.92465974100905 +429,-8.900101678745584,-8.36971807214309 +430,-1.6751817498200121,-6.3104146250134825 +431,-2.894584684262049,-8.036841567603094 +432,1.796657968898645,7.948850883900677 +433,1.0478983997059044,9.363103162399927 +434,-2.8409450135907552,-6.171628297545874 +435,-2.227306347842321,-6.307491116034234 +436,1.6260369917293342,9.016427568754885 +437,-6.813430426503064,-7.644333527982396 +438,-4.462839299184512,-6.573060219011799 +439,0.1690909005821588,9.16580847024903 +440,-7.598677898814356,-7.0712033610174085 +441,-7.130545845445336,-8.707415522912568 +442,-8.934843243074333,-7.952078437140888 +443,-3.978099590488443,-6.628705569581859 +444,-2.0712704040859973,-5.601505609040887 +445,2.0107184297942395,8.780675845244515 +446,-3.612174159813057,-6.188615653153706 +447,2.210365580671883,8.016784178228079 +448,-4.714867081035019,-9.349071721468626 +449,-7.603898205496083,-8.606528934089987 +450,-4.060103843491486,-6.57141738920403 +451,3.0452962731722475,8.884983588793832 +452,1.392452429215557,9.793422837370947 +453,-9.709064683019628,-9.766253362570373 +454,-3.519895048653918,-4.50156998426432 +455,2.9581576907643146,9.90516142670879 +456,2.1006943315411624,10.852047777968103 +457,-3.740157806215068,-6.14909650979808 +458,-8.069207083924681,-6.857970269064291 +459,-1.599187821745982,-6.725930792173125 +460,-2.767838394170003,-7.581390024530235 +461,-4.883757201427826,-7.189390521384471 +462,1.6596127274734807,9.630361598946024 +463,-3.159475642662568,-6.692154385691226 +464,1.1998382168844066,9.75563664756965 +465,-9.38331058088836,-8.299734614250717 +466,-7.690211872752039,-6.338609710764013 +467,-7.595740118618205,-5.1514551068927075 +468,-8.453363823055831,-8.00455706914973 +469,3.4034179522453205,9.547176659378051 +470,-8.883897172481838,-7.043012176402381 +471,-9.24375628029471,-8.133943012104707 +472,3.030682510948817,6.618218462786886 +473,1.6003638054848441,8.658084851830768 +474,-8.155015798946494,-8.624187016721818 +475,-3.13659771045095,-7.28836362434932 +476,-10.242449146559304,-6.961456415847736 +477,1.6054337578913696,8.810540316186987 +478,-9.635539446409513,-6.765951710036209 +479,-8.577805905732227,-8.667280072120896 +480,2.886215426011436,8.982921132883831 +481,0.7428492084039351,7.738771003865294 +482,-9.196553247815073,-7.92827867725508 +483,-3.693772722581926,-7.005408387193325 +484,-5.852953942291274,-7.250235565635251 +485,-4.0273949389934405,-7.509024768532071 +486,-3.5495371738813892,-5.651485732968955 +487,3.1324197020855764,9.624251667101094 +488,-5.361297037860611,-4.843864066191982 +489,2.387823375106472,7.855001277929019 +490,4.294189895452808,8.247457224926999 +491,3.4935052421884745,7.742016632282266 +492,3.295763345159723,10.196403866251892 +493,0.25522282062584756,8.797612288356916 +494,-9.174138183617176,-6.689011630188279 +495,2.363426941545468,7.950471612301264 +496,-3.288847817689608,-7.442175509767312 +497,-6.14653267940901,-7.829939896149226 +498,-8.740032954828624,-9.306254941130103 +499,-3.027879646045245,-7.217619315047918 +500,-9.090040975217521,-7.731935852212176 +501,-2.7474414193326173,-6.609617700465485 +502,-8.466691586974017,-8.723660649567059 +503,0.8745756609720133,10.299831979302303 +504,-5.569382978924867,-5.871387110342048 +505,-5.060035529778957,-5.8637954937466334 +506,-4.194768378212306,-6.191284911206974 +507,1.9108114255589896,9.341347244142318 +508,2.757968314057243,8.068380353373065 +509,-9.457255140119786,-6.82228772868949 +510,-10.103743972207798,-6.163900492820808 +511,1.3654520919423347,7.430361875030972 +512,-10.539029327653,-7.048406703897319 +513,-9.610832138636368,-7.866206040717246 +514,-8.86913950421189,-7.165287966918405 +515,4.454529071472935,10.930805747352025 +516,-7.228541203936608,-7.801934124727664 +517,-7.960814643910588,-8.799585007463984 +518,-7.150102452454222,-5.934653293322399 +519,2.225873749663514,9.426840907394965 +520,-9.959393056256307,-6.695374583702613 +521,-10.291965829260349,-5.930930881957479 +522,2.2946809740902503,8.557590624852777 +523,-4.6837635934718325,-6.365077738131338 +524,-3.2203843310929776,-7.492467284541282 +525,-2.6251346193894634,-7.96861460076258 +526,-8.740940249070299,-8.725403642555193 +527,-3.2090921150933527,-4.150885397038803 +528,-8.095934459427891,-6.163405383251439 +529,-4.06640058664513,-7.59179736258314 +530,-8.986712346492256,-8.302193302573343 +531,-4.057358455778956,-6.376310776362724 +532,0.8597153790180284,9.971206639797476 +533,-10.238756492798004,-7.032909005136046 +534,-3.818802764142079,-7.117417145772932 +535,-9.387796060901183,-7.753538029825167 +536,-4.754370119268106,-7.41570935642935 +537,1.1377862575874307,7.858982216165167 +538,1.2033960393411534,8.370842929142466 +539,-6.682543227685962,-6.4354127077290535 +540,-4.376838486705878,-6.757929731118529 +541,1.4763144442194964,8.767287524041029 +542,-9.15903461658571,-7.743373236050885 +543,-4.635303453440217,-6.529589710059669 +544,3.807486502902972,7.601658904651514 +545,2.8230904569409025,8.751443168670802 +546,0.9299649702335456,8.830943517071482 +547,-8.691582658466626,-8.503413125974713 +548,-9.071621254951342,-8.710352942506473 +549,-4.049766712813901,-8.664499361565676 +550,1.8245463275173732,7.9761674099761635 +551,-3.3493823486024947,-7.493249748006302 +552,-6.989707010138683,-10.403483194439584 +553,-9.251915833519178,-7.366540058976776 +554,1.5493969049409129,6.616970097682397 +555,-8.387243825087705,-6.3584597244096575 +556,-5.419049608305913,-5.939452585406271 +557,-4.370029593497041,-5.91071909098837 +558,0.9312862912974271,8.829513721120074 +559,-3.4126342996730945,-6.793426480956431 +560,-4.297065951581121,-7.472319629223204 +561,1.1070493362854699,10.265920257932057 +562,-8.354902370372622,-8.759611704266433 +563,-1.1760744018566798,-7.368820263201566 +564,1.1412600947448923,7.2167488709494485 +565,-1.8406854826480157,-5.044577645545261 +566,1.0158097003284685,8.30291649430223 +567,-3.6295727926805164,-5.502430962352257 +568,-7.8031522894756815,-9.506751304080428 +569,1.0041110581320907,8.750171253145108 +570,-9.319926024044026,-7.733415511117693 +571,-5.486645481531933,-8.83303062916075 +572,-4.603566875937983,-7.7984975677448585 +573,-3.972435031417204,-5.622934143284206 +574,-4.6359557675552665,-5.907135244908342 +575,-9.982127344096675,-7.327077425734391 +576,-2.1826412023412667,-5.718481255870376 +577,1.6076158887039014,10.035529414003625 +578,-9.657866194455266,-6.346791617600089 +579,-7.586898689929213,-8.124987571516584 +580,1.593291201280905,8.715554965102461 +581,-2.916782935354813,-7.121178500724829 +582,2.6068939877583848,9.700345198182983 +583,-10.83923115580838,-8.755033803749605 +584,-10.897319142176741,-6.967850097530078 +585,-2.35304395343788,-7.675821778854565 +586,-4.284376713730921,-5.014144131177996 +587,-9.540278288389061,-8.878692618031153 +588,-3.718801600771222,-6.645133456613008 +589,-3.906793398728766,-7.412486938254312 +590,1.325668478051277,8.589775939517128 +591,1.760049645315195,9.871884524484415 +592,-3.376000557366496,-6.414735971787719 +593,-3.3968410194594814,-7.496309738719732 +594,-0.8635316869591434,8.470488027768871 +595,3.212842131889252,7.617877682770626 +596,-4.112611607209468,-6.708665111654143 +597,-5.824710556376033,-6.001569780543621 +598,-4.596295359433397,-5.458264749394884 +599,-4.345974396894074,-7.470598519372723 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/models/store_models_in_this_folder.txt b/Libraries/oneDAL/daal4py_Distributed_Kmeans/models/store_models_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/results/store_results_in_this_folder.txt b/Libraries/oneDAL/daal4py_Distributed_Kmeans/results/store_results_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Distributed_Kmeans/sample.json b/Libraries/oneDAL/daal4py_Distributed_Kmeans/sample.json new file mode 100755 index 0000000000..ca263dac01 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_Kmeans/sample.json @@ -0,0 +1,22 @@ +{ + "guid": "B69FAC86-88BF-41BD-B4E0-ACDF753ED3CE", + "name": "daal4py Distributed K-means", + "categories": ["Toolkit/Intel® AI Analytics Toolkit/oneDAL"], + "description": "This sample code shows how to train and predict with a distributed k-means model with the Intel Distribution of Python using the python API package daal4py for oneDAL", + "builder": ["cli"], + "languages": [{"python":{}}], + "dependencies": ["oneDAL"], + "os":["linux"], + "targetDevice": ["CPU"], + "ciTests": { + "linux": [ + { + "env": ["source /opt/intel/oneapi/setvars.sh --force", "source activate base"], + "id": "d4p_KM_Dist", + "steps": [ + "mpirun -n 4 python ./daal4py_Distributed_Kmeans.py" + ] + } + ] +} +} diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/License.txt b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/License.txt new file mode 100755 index 0000000000..a3ab05efce --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +© 2020 GitHub, Inc. \ No newline at end of file diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/README.md b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/README.md new file mode 100755 index 0000000000..19a0633325 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/README.md @@ -0,0 +1,119 @@ +# daal4py Distributed Linear Regression +This sample code shows how to train and predict with a distributed linear regression model using the python API package daal4py for oneAPI Data Analytics Library. It assumes you have a working version of MPI library installed and it demonstrates how to use software products that can be found in the [Intel oneAPI Data Analytics Library](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onedal.html) or [Intel AI Analytics Toolkit powered by oneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit.html). + +| Optimized for | Description +| :--- | :--- +| OS | 64-bit Linux: Ubuntu 18.04 or higher, 64-bit Windows 10, macOS 10.14 or higher +| Hardware | Intel Atom® Processors; Intel® Core™ Processor Family; Intel® Xeon® Processor Family; Intel® Xeon® Scalable Performance Processor Family +| Software | oneDAL Software Library, Python version 2.7 or >= 3.6, conda-build version >= 3, C++ compiler with C++11 support, Pickle, Pandas, NumPy +| What you will learn | distributed oneDAL Linear Regression programming model for Intel CPU +| Time to complete | 5 minutes + +## Purpose + +daal4py is a simplified API to Intel® DAAL that allows for fast usage of the framework suited for Data Scientists or Machine Learning users. Built to help provide an abstraction to Intel® DAAL for either direct usage or integration into one's own framework. + +In this sample you will run a distributed Linear Regression model with oneDAL daal4py library memory objects. You will also learn how to train a model and save the information to a file. + +## Key Implementation Details +This distributed linear regression sample code is implemented for CPU using the Python language. The example assumes you have daal4py and scikit-learn installed inside a conda environment, similar to what is delivered with the installation of the Intel(R) Distribution for Python as part of the [oneAPI AI Analytics Toolkit](https://software.intel.com/en-us/oneapi/ai-kit). + + +## Additional Requirements +You will need a working MPI library. We recommend to use Intel(R) MPI, which is included in the [oneAPI HPC Toolkit](https://software.intel.com/en-us/oneapi/hpc-kit). + +## License +This code sample is licensed under MIT license + +## Building daal4py for CPU + +oneAPI Data Analytics Library is ready for use once you finish the Intel AI Analytics Toolkit installation, and have run the post installation script. + +You can refer to the oneAPI [main page](https://software.intel.com/en-us/oneapi) for toolkit installation, and the Toolkit [Getting Started Guide for Linux](https://software.intel.com/en-us/get-started-with-intel-oneapi-linux-get-started-with-the-intel-ai-analytics-toolkit) for post-installation steps and scripts. + + +### Activate conda environment With Root Access + +Please follow the Getting Started Guide steps (above) to set up your oneAPI environment with the setvars.sh script. Then navigate in linux shell to your oneapi installation path, typically `~/intel/inteloneapi`. Intel Python environment will be activte by default. However, if you activated another environment, you can return with the following command: + +#### On a Linux* System +``` +source activate base +``` + +### Activate conda environment Without Root Access (Optional) + +By default, the Intel AI Analytics toolkit is installed in the inteloneapi folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command: + +#### On a Linux* System +``` +conda create --name user_base --clone base +``` + +Then activate your conda environment with the following command: + +``` +source activate user_base +``` + +### Install Jupyter Notebook +``` +conda install jupyter nb_conda_kernels +``` + +#### View in Jupyter Notebook + +_Note: This distributed execution cannot be launched from the jupyter notebook version, but you can still view inside the notebook to follow the included write-up and description._ + +Launch Jupyter Notebook in the directory housing the code example + +``` +jupyter notebook +``` + +## Running the Sample + +### Running the Sample as a Python File + +When using daal4py for distributed memory systems, the command needed to execute the program should be executed in a bash shell. To execute this example, run the following command, where the number **4** is chosen as an example and means that it will run on **4 processes**: + +Run the Program + +`mpirun -n 4 python ./daal4py_Distributed_LinearRegression.py` + +The output of the script will be saved in the included models and results directories. + +_Note: This code samples focuses on how to use daal4py to do distributed ML computations on chunks of data. The `mpirun` command above will only run on single local node. In order to launch on a cluster, you will need to create a host file on the master node among other steps. The **TensorFlow_Multinode_Training_with_Horovod** code sample explains this process well._ + +##### Expected Printed Output (with similar numbers, printed 4 times): +``` + + +Here's our model: + + + NumberOfBetas: 15 + +NumberOfResponses: 1 + +InterceptFlag: False + +Beta: array( + [[ 0.00000000e+00 -3.20923431e-03 -1.06404233e-01 5.46052700e-02 + 2.86834741e-03 2.75997053e+00 -2.54371297e+00 5.52421949e+00 + 6.67604639e-04 -9.01293646e-01 1.96091421e-01 -7.50083536e-03 + -3.11567377e-01 1.58333298e-02 -4.62941338e-01]], + dtype=float64, shape=(1, 15)) + +NumberOfFeatures: 14 + +Here is one of our loaded model's features: + + [[ 0.00000000e+00 -3.20923431e-03 -1.06404233e-01 5.46052700e-02 + 2.86834741e-03 2.75997053e+00 -2.54371297e+00 5.52421949e+00 + 6.67604639e-04 -9.01293646e-01 1.96091421e-01 -7.50083536e-03 + -3.11567377e-01 1.58333298e-02 -4.62941338e-01]] +[CODE_SAMPLE_COMPLETED_SUCCESFULLY] + +``` + diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.ipynb b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.ipynb new file mode 100755 index 0000000000..ea51a22f11 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# =============================================================\n", + "# Copyright © 2020 Intel Corporation\n", + "# \n", + "# SPDX-License-Identifier: MIT\n", + "# =============================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Daal4py Linear Regression Example for Distributed Memory Systems [SPMD mode]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## IMPORTANT NOTICE\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When using daal4py for distributed memory systems, the command needed to execute the program should be **executed \n", + "in a bash shell**. In order to run this example, please download it as a .py file then run the following command (**the number 4 means that it will run on 4 processes**):" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mpirun -n 4 python ./daal4py_Distributed_LinearRegression.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing and Organizing Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we will be predicting **prices of houses in Boston** based on the features of each house.\n", + "\n", + "Let's start by **importing** all necessary data and packages." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "##### daal4py linear regression example for distributed memory systems [SPMD mode] #####\n", + "import daal4py as d4p\n", + "from sklearn.datasets import load_boston\n", + "from sklearn.model_selection import train_test_split\n", + "import pandas as pd\n", + "import numpy as np\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load** in the dataset and **organize** it as necessary to work with our model. For distributed, every file has a unique ID.\n", + "\n", + "We will also **initialize the distribution engine**." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "d4p.daalinit() #initializes the distribution engine\n", + "\n", + "# organizing variables used in the model for prediction\n", + "# each process gets its own data\n", + "infile = \"./data/distributed_data/linear_regression_train_\" + str(d4p.my_procid()+1) + \".csv\"\n", + "\n", + "# read data\n", + "indep_data = pd.read_csv(infile).drop([\"target\"], axis=1) # house characteristics\n", + "dep_data = pd.read_csv(infile)[\"target\"] # house price" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training and Saving the Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time to **train our model** and look at the model's features! " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# training the model for prediction\n", + "train_result = d4p.linear_regression_training(distributed=True).compute(indep_data, dep_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To **get training model information** and **save it to a file**:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here's our model:\n", + "\n", + "\n", + " NumberOfBetas: 15\n", + "\n", + "NumberOfResponses: 1\n", + "\n", + "InterceptFlag: False\n", + "\n", + "Beta: array(\n", + " [[ 0.00000000e+00 -1.68027665e-04 -7.40435666e-02 3.72706786e-02\n", + " -1.32246207e-01 5.24821226e+00 -2.09646770e+00 6.15919748e+00\n", + " -1.17193612e-03 -8.86515999e-01 2.23344092e-02 -1.09556173e-03\n", + " -4.40967972e-01 1.12216533e-02 -4.74953243e-01]],\n", + " dtype=float64, shape=(1, 15))\n", + "\n", + "NumberOfFeatures: 14 \n", + "\n" + ] + } + ], + "source": [ + "# retrieving and printing training model\n", + "model = train_result.model\n", + "print(\"Here's our model:\\n\\n\\n\",model , \"\\n\")\n", + "\n", + "model_filename = './models/daal4py_Distributed_LinearRegression_' + str(d4p.my_procid()+1) + '.sav'\n", + "\n", + "# saving model to a file\n", + "pickle.dump(model, open(model_filename, \"wb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **load up the model** and look at one of the model's features." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Here is one of our loaded model's features: \n", + "\n", + " [[ 0.00000000e+00 -1.68027665e-04 -7.40435666e-02 3.72706786e-02\n", + " -1.32246207e-01 5.24821226e+00 -2.09646770e+00 6.15919748e+00\n", + " -1.17193612e-03 -8.86515999e-01 2.23344092e-02 -1.09556173e-03\n", + " -4.40967972e-01 1.12216533e-02 -4.74953243e-01]]\n" + ] + } + ], + "source": [ + "# loading the training model from a file\n", + "loaded_model = pickle.load(open(model_filename, \"rb\"))\n", + "print(\"Here is one of our loaded model's features: \\n\\n\",loaded_model.Beta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making a Prediction and Saving the Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time to **make a prediction!**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# read test data\n", + "test_data = pd.read_csv(\"./data/distributed_data/linear_regression_test.csv\").drop([\"target\"], axis=1)\n", + "\n", + "# now predict using the model from the training above\n", + "predict_result = d4p.linear_regression_prediction().compute(test_data, train_result.model).prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's **export the results to a CSV file**. We will also **stop the distribution engine.**" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CODE_SAMPLE_COMPLETED_SUCCESFULLY]\n" + ] + } + ], + "source": [ + "# now export the results to a CSV file\n", + "results_filename = \"./results/daal4py_Distributed_LinearRegression_results\" + str(d4p.my_procid()+1) + \".csv\"\n", + "np.savetxt(results_filename, predict_result, delimiter = \",\")\n", + "\n", + "d4p.daalfini() # stops the distribution engine\n", + "print(\"[CODE_SAMPLE_COMPLETED_SUCCESFULLY]\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.py b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.py new file mode 100755 index 0000000000..820afbafa3 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/daal4py_Distributed_LinearRegression.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +''' +============================================================= +Copyright © 2020 Intel Corporation + +SPDX-License-Identifier: MIT +============================================================= +''' + +# # Daal4py Linear Regression Example for Distributed Memory Systems [SPMD mode] + +# ## IMPORTANT NOTICE +# + +# When using daal4py for distributed memory systems, the command needed to execute the program should be **executed +# in a bash shell**. In order to run this example, please download it as a .py file then run the following command (**the number 4 means that it will run on 4 processes**): + +# mpirun -n 4 python ./daal4py_Distributed_LinearRegression.py + +# ## Importing and Organizing Data + +# In this example we will be predicting **prices of houses in Boston** based on the features of each house. +# +# Let's start by **importing** all necessary data and packages. + +# In[2]: + + +##### daal4py linear regression example for distributed memory systems [SPMD mode] ##### +import daal4py as d4p +from sklearn.datasets import load_boston +from sklearn.model_selection import train_test_split +import pandas as pd +import numpy as np +import pickle + + +# Now let's **load** in the dataset and **organize** it as necessary to work with our model. For distributed, every file has a unique ID. +# +# We will also **initialize the distribution engine**. + +# In[3]: + + +d4p.daalinit() #initializes the distribution engine + +# organizing variables used in the model for prediction +# each process gets its own data +infile = "./data/distributed_data/linear_regression_train_" + str(d4p.my_procid()+1) + ".csv" + +# read data +indep_data = pd.read_csv(infile).drop(["target"], axis=1) # house characteristics +dep_data = pd.read_csv(infile)["target"] # house price + + +# ## Training and Saving the Model + +# Time to **train our model** and look at the model's features! + +# In[4]: + + +# training the model for prediction +train_result = d4p.linear_regression_training(distributed=True).compute(indep_data, dep_data) + + +# To **get training model information** and **save it to a file**: + +# In[5]: + + +# retrieving and printing training model +model = train_result.model +print("Here's our model:\n\n\n",model , "\n") + +model_filename = './models/daal4py_Distributed_LinearRegression_' + str(d4p.my_procid()+1) + '.sav' + +# saving model to a file +pickle.dump(model, open(model_filename, "wb")) + + +# Now let's **load up the model** and look at one of the model's features. + +# In[6]: + + +# loading the training model from a file +loaded_model = pickle.load(open(model_filename, "rb")) +print("Here is one of our loaded model's features: \n\n",loaded_model.Beta) + + +# ## Making a Prediction and Saving the Results + +# Time to **make a prediction!** + +# In[9]: + + +# read test data +test_data = pd.read_csv("./data/distributed_data/linear_regression_test.csv").drop(["target"], axis=1) + +# now predict using the model from the training above +predict_result = d4p.linear_regression_prediction().compute(test_data, train_result.model).prediction + + +# Now let's **export the results to a CSV file**. We will also **stop the distribution engine.** + +# In[10]: + + +# now export the results to a CSV file +results_filename = "./results/daal4py_Distributed_LinearRegression_results" + str(d4p.my_procid()+1) + ".csv" +np.savetxt(results_filename, predict_result, delimiter = ",") + +d4p.daalfini() # stops the distribution engine +print('[CODE_SAMPLE_COMPLETED_SUCCESFULLY]') + diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_test.csv b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_test.csv new file mode 100755 index 0000000000..3f06e11969 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_test.csv @@ -0,0 +1,128 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,target +357,3.8497,0.0,18.1,1.0,0.77,6.395,91.0,2.5052,24.0,666.0,20.2,391.34,13.27,21.7 +236,0.52058,0.0,6.2,1.0,0.507,6.631,76.5,4.148,8.0,307.0,17.4,388.45,9.54,25.1 +262,0.52014,20.0,3.97,0.0,0.647,8.398,91.5,2.2885,5.0,264.0,13.0,386.86,5.91,48.8 +168,2.3004,0.0,19.58,0.0,0.605,6.319,96.1,2.1,5.0,403.0,14.7,297.09,11.1,23.8 +102,0.22876,0.0,8.56,0.0,0.52,6.405,85.4,2.7147,5.0,384.0,20.9,70.8,10.63,18.6 +33,1.15172,0.0,8.14,0.0,0.538,5.701,95.0,3.7872,4.0,307.0,21.0,358.77,18.35,13.1 +475,6.39312,0.0,18.1,0.0,0.584,6.162,97.4,2.206,24.0,666.0,20.2,302.76,24.1,13.3 +376,15.288,0.0,18.1,0.0,0.671,6.649,93.3,1.3449,24.0,666.0,20.2,363.02,23.24,13.9 +285,0.01096,55.0,2.25,0.0,0.389,6.453,31.9,7.3073,1.0,300.0,15.3,394.72,8.23,22.0 +179,0.0578,0.0,2.46,0.0,0.488,6.98,58.4,2.829,3.0,193.0,17.8,396.9,5.04,37.2 +315,0.25356,0.0,9.9,0.0,0.544,5.705,77.7,3.945,4.0,304.0,18.4,396.42,11.5,16.2 +175,0.06664,0.0,4.05,0.0,0.51,6.546,33.1,3.1323,5.0,296.0,16.6,390.96,5.33,29.4 +223,0.6147,0.0,6.2,0.0,0.507,6.618,80.8,3.2721,8.0,307.0,17.4,396.9,7.6,30.1 +309,0.3494,0.0,9.9,0.0,0.544,5.972,76.7,3.1025,4.0,304.0,18.4,396.24,9.97,20.3 +137,0.35233,0.0,21.89,0.0,0.624,6.454,98.4,1.8498,4.0,437.0,21.2,394.08,14.59,17.1 +226,0.38214,0.0,6.2,0.0,0.504,8.04,86.5,3.2157,8.0,307.0,17.4,387.38,3.13,37.6 +328,0.06617,0.0,3.24,0.0,0.46,5.868,25.8,5.2146,4.0,430.0,16.9,382.44,9.97,19.3 +416,10.8342,0.0,18.1,0.0,0.679,6.782,90.8,1.8195,24.0,666.0,20.2,21.57,25.79,7.5 +409,14.4383,0.0,18.1,0.0,0.597,6.852,100.0,1.4655,24.0,666.0,20.2,179.36,19.78,27.5 +84,0.05059,0.0,4.49,0.0,0.449,6.389,48.0,4.7794,3.0,247.0,18.5,396.9,9.62,23.9 +186,0.05602,0.0,2.46,0.0,0.488,7.831,53.6,3.1992,3.0,193.0,17.8,392.63,4.45,50.0 +133,0.32982,0.0,21.89,0.0,0.624,5.822,95.4,2.4699,4.0,437.0,21.2,388.69,15.03,18.4 +230,0.537,0.0,6.2,0.0,0.504,5.981,68.1,3.6715,8.0,307.0,17.4,378.35,11.65,24.3 +454,9.51363,0.0,18.1,0.0,0.713,6.728,94.1,2.4961,24.0,666.0,20.2,6.68,18.71,14.9 +7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1 +117,0.15098,0.0,10.01,0.0,0.547,6.021,82.6,2.7474,6.0,432.0,17.8,394.51,10.3,19.2 +214,0.28955,0.0,10.59,0.0,0.489,5.412,9.8,3.5875,4.0,277.0,18.6,348.93,29.55,23.7 +15,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47,19.9 +136,0.32264,0.0,21.89,0.0,0.624,5.942,93.5,1.9669,4.0,437.0,21.2,378.25,16.9,17.4 +388,14.3337,0.0,18.1,0.0,0.7,4.88,100.0,1.5895,24.0,666.0,20.2,372.92,30.62,10.2 +178,0.06642,0.0,4.05,0.0,0.51,6.86,74.4,2.9153,5.0,296.0,16.6,391.27,6.92,29.9 +95,0.12204,0.0,2.89,0.0,0.445,6.625,57.8,3.4952,2.0,276.0,18.0,357.98,6.65,28.4 +495,0.17899,0.0,9.69,0.0,0.585,5.67,28.8,2.7986,6.0,391.0,19.2,393.29,17.6,23.1 +53,0.04981,21.0,5.64,0.0,0.439,5.998,21.4,6.8147,4.0,243.0,16.8,396.9,8.43,23.4 +131,1.19294,0.0,21.89,0.0,0.624,6.326,97.7,2.271,4.0,437.0,21.2,396.9,12.26,19.6 +392,11.5779,0.0,18.1,0.0,0.7,5.036,97.0,1.77,24.0,666.0,20.2,396.9,25.68,9.7 +25,0.84054,0.0,8.14,0.0,0.538,5.599,85.7,4.4546,4.0,307.0,21.0,303.42,16.51,13.9 +302,0.09266,34.0,6.09,0.0,0.433,6.495,18.4,5.4917,7.0,329.0,16.1,383.61,8.67,26.4 +199,0.0315,95.0,1.47,0.0,0.403,6.975,15.3,7.6534,3.0,402.0,17.0,396.9,4.56,34.9 +210,0.17446,0.0,10.59,1.0,0.489,5.96,92.1,3.8771,4.0,277.0,18.6,393.25,17.27,21.7 +292,0.03615,80.0,4.95,0.0,0.411,6.63,23.4,5.1167,4.0,245.0,19.2,396.9,4.7,27.9 +290,0.03502,80.0,4.95,0.0,0.411,6.861,27.9,5.1167,4.0,245.0,19.2,396.9,3.33,28.5 +128,0.32543,0.0,21.89,0.0,0.624,6.431,98.8,1.8125,4.0,437.0,21.2,396.9,15.39,18.0 +489,0.18337,0.0,27.74,0.0,0.609,5.414,98.3,1.7554,4.0,711.0,20.1,344.05,23.97,7.0 +367,13.5222,0.0,18.1,0.0,0.631,3.863,100.0,1.5106,24.0,666.0,20.2,131.42,13.33,23.1 +122,0.09299,0.0,25.65,0.0,0.581,5.961,92.9,2.0869,2.0,188.0,19.1,378.09,17.93,20.5 +171,2.3139,0.0,19.58,0.0,0.605,5.88,97.3,2.3887,5.0,403.0,14.7,348.13,12.03,19.1 +405,67.9208,0.0,18.1,0.0,0.693,5.683,100.0,1.4254,24.0,666.0,20.2,384.97,22.98,5.0 +225,0.52693,0.0,6.2,0.0,0.504,8.725,83.0,2.8944,8.0,307.0,17.4,382.0,4.63,50.0 +228,0.29819,0.0,6.2,0.0,0.504,7.686,17.0,3.3751,8.0,307.0,17.4,377.51,3.92,46.7 +162,1.83377,0.0,19.58,1.0,0.605,7.802,98.2,2.0407,5.0,403.0,14.7,389.61,1.92,50.0 +394,13.3598,0.0,18.1,0.0,0.693,5.887,94.7,1.7821,24.0,666.0,20.2,396.9,16.35,12.7 +461,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65,17.7 +242,0.1029,30.0,4.93,0.0,0.428,6.358,52.9,7.0355,6.0,300.0,16.6,372.75,11.22,22.2 +55,0.01311,90.0,1.22,0.0,0.403,7.249,21.9,8.6966,5.0,226.0,17.9,395.93,4.81,35.4 +459,6.80117,0.0,18.1,0.0,0.713,6.081,84.4,2.7175,24.0,666.0,20.2,396.9,14.7,20.0 +286,0.01965,80.0,1.76,0.0,0.385,6.23,31.5,9.0892,1.0,241.0,18.2,341.6,12.93,20.1 +1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6 +440,22.0511,0.0,18.1,0.0,0.74,5.818,92.4,1.8662,24.0,666.0,20.2,391.45,22.11,10.5 +456,4.66883,0.0,18.1,0.0,0.713,5.976,87.9,2.5806,24.0,666.0,20.2,10.48,19.01,12.7 +157,1.22358,0.0,19.58,0.0,0.605,6.943,97.4,1.8773,5.0,403.0,14.7,363.43,4.59,41.3 +60,0.14932,25.0,5.13,0.0,0.453,5.741,66.2,7.2254,8.0,284.0,19.7,395.11,13.15,18.7 +111,0.10084,0.0,10.01,0.0,0.547,6.715,81.6,2.6775,6.0,432.0,17.8,395.59,10.16,22.8 +24,0.75026,0.0,8.14,0.0,0.538,5.924,94.1,4.3996,4.0,307.0,21.0,394.33,16.3,15.6 +20,1.25179,0.0,8.14,0.0,0.538,5.57,98.1,3.7979,4.0,307.0,21.0,376.57,21.02,13.6 +32,1.38799,0.0,8.14,0.0,0.538,5.95,82.0,3.99,4.0,307.0,21.0,232.6,27.71,13.2 +276,0.10469,40.0,6.41,1.0,0.447,7.267,49.0,4.7872,4.0,254.0,17.6,389.25,6.05,33.2 +191,0.06911,45.0,3.44,0.0,0.437,6.739,30.8,6.4798,5.0,398.0,15.2,389.71,4.69,30.5 +480,5.82401,0.0,18.1,0.0,0.532,6.242,64.7,3.4242,24.0,666.0,20.2,396.9,10.74,23.0 +485,3.67367,0.0,18.1,0.0,0.583,6.312,51.9,3.9917,24.0,666.0,20.2,388.62,10.58,21.2 +141,1.62864,0.0,21.89,0.0,0.624,5.019,100.0,1.4394,4.0,437.0,21.2,396.9,34.41,14.4 +493,0.17331,0.0,9.69,0.0,0.585,5.707,54.0,2.3817,6.0,391.0,19.2,396.9,12.01,21.8 +70,0.08826,0.0,10.81,0.0,0.413,6.417,6.6,5.2873,4.0,305.0,19.2,383.73,6.72,24.2 +269,0.09065,20.0,6.96,1.0,0.464,5.92,61.5,3.9175,3.0,223.0,18.6,391.34,13.65,20.7 +65,0.03584,80.0,3.37,0.0,0.398,6.29,17.8,6.6115,4.0,337.0,16.1,396.9,4.67,23.5 +195,0.01381,80.0,0.46,0.0,0.422,7.875,32.0,5.6484,4.0,255.0,14.4,394.23,2.97,50.0 +3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4 +433,5.58107,0.0,18.1,0.0,0.713,6.436,87.9,2.3158,24.0,666.0,20.2,100.19,16.22,14.3 +431,10.0623,0.0,18.1,0.0,0.584,6.833,94.3,2.0882,24.0,666.0,20.2,81.33,19.69,14.1 +87,0.07151,0.0,4.49,0.0,0.449,6.121,56.8,3.7476,3.0,247.0,18.5,395.15,8.44,22.2 +62,0.11027,25.0,5.13,0.0,0.453,6.456,67.8,7.2255,8.0,284.0,19.7,396.9,6.73,22.2 +108,0.12802,0.0,8.56,0.0,0.52,6.474,97.1,2.4329,5.0,384.0,20.9,395.24,12.27,19.8 +393,8.64476,0.0,18.1,0.0,0.693,6.193,92.6,1.7912,24.0,666.0,20.2,396.9,15.17,13.8 +398,38.3518,0.0,18.1,0.0,0.693,5.453,100.0,1.4896,24.0,666.0,20.2,396.9,30.59,5.0 +132,0.59005,0.0,21.89,0.0,0.624,6.372,97.9,2.3274,4.0,437.0,21.2,385.76,11.12,23.0 +241,0.10612,30.0,4.93,0.0,0.428,6.095,65.1,6.3361,6.0,300.0,16.6,394.62,12.4,20.1 +14,0.63796,0.0,8.14,0.0,0.538,6.096,84.5,4.4619,4.0,307.0,21.0,380.02,10.26,18.2 +83,0.03551,25.0,4.86,0.0,0.426,6.167,46.7,5.4007,4.0,281.0,19.0,390.64,7.51,22.9 +284,0.00906,90.0,2.97,0.0,0.4,7.088,20.8,7.3073,1.0,285.0,15.3,394.72,7.85,32.2 +353,0.01709,90.0,2.02,0.0,0.41,6.728,36.1,12.1265,5.0,187.0,17.0,384.46,4.5,30.1 +90,0.04684,0.0,3.41,0.0,0.489,6.417,66.1,3.0923,2.0,270.0,17.8,392.18,8.81,22.6 +414,45.7461,0.0,18.1,0.0,0.693,4.519,100.0,1.6582,24.0,666.0,20.2,88.27,36.98,7.0 +257,0.61154,20.0,3.97,0.0,0.647,8.704,86.9,1.801,5.0,264.0,13.0,389.7,5.12,50.0 +313,0.26938,0.0,9.9,0.0,0.544,6.266,82.8,3.2628,4.0,304.0,18.4,393.39,7.9,21.6 +438,13.6781,0.0,18.1,0.0,0.74,5.935,87.9,1.8206,24.0,666.0,20.2,68.95,34.02,8.4 +159,1.42502,0.0,19.58,0.0,0.871,6.51,100.0,1.7659,5.0,403.0,14.7,364.31,7.39,23.3 +36,0.09744,0.0,5.96,0.0,0.499,5.841,61.4,3.3779,5.0,279.0,19.2,377.56,11.41,20.0 +283,0.01501,90.0,1.21,1.0,0.401,7.923,24.8,5.885,1.0,198.0,13.6,395.52,3.16,50.0 +126,0.38735,0.0,25.65,0.0,0.581,5.613,95.6,1.7572,2.0,188.0,19.1,359.29,27.26,15.7 +369,5.66998,0.0,18.1,1.0,0.631,6.683,96.8,1.3567,24.0,666.0,20.2,375.33,3.73,50.0 +264,0.55007,20.0,3.97,0.0,0.647,7.206,91.6,1.9301,5.0,264.0,13.0,387.89,8.1,36.5 +325,0.19186,0.0,7.38,0.0,0.493,6.431,14.7,5.4159,5.0,287.0,19.6,393.68,5.08,24.6 +399,9.91655,0.0,18.1,0.0,0.693,5.852,77.8,1.5004,24.0,666.0,20.2,338.16,29.97,6.3 +449,7.52601,0.0,18.1,0.0,0.713,6.417,98.3,2.185,24.0,666.0,20.2,304.21,19.31,13.0 +436,14.4208,0.0,18.1,0.0,0.74,6.461,93.3,2.0026,24.0,666.0,20.2,27.49,18.05,9.6 +80,0.04113,25.0,4.86,0.0,0.426,6.727,33.5,5.4007,4.0,281.0,19.0,396.9,5.29,28.0 +220,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71,26.7 +93,0.02875,28.0,15.04,0.0,0.464,6.211,28.9,3.6659,4.0,270.0,18.2,396.33,6.21,25.0 +363,4.22239,0.0,18.1,1.0,0.77,5.803,89.0,1.9047,24.0,666.0,20.2,353.04,14.64,16.8 +258,0.66351,20.0,3.97,0.0,0.647,7.333,100.0,1.8946,5.0,264.0,13.0,383.29,7.79,36.0 +198,0.03768,80.0,1.52,0.0,0.404,7.274,38.3,7.309,2.0,329.0,12.6,392.2,6.62,34.6 +460,4.81213,0.0,18.1,0.0,0.713,6.701,90.0,2.5975,24.0,666.0,20.2,255.23,16.42,16.4 +107,0.13117,0.0,8.56,0.0,0.52,6.127,85.2,2.1224,5.0,384.0,20.9,387.69,14.09,20.4 +125,0.16902,0.0,25.65,0.0,0.581,5.986,88.4,1.9929,2.0,188.0,19.1,385.02,14.81,21.4 +113,0.22212,0.0,10.01,0.0,0.547,6.092,95.4,2.548,6.0,432.0,17.8,396.9,17.09,18.7 +218,0.11069,0.0,13.89,1.0,0.55,5.951,93.8,2.8893,5.0,276.0,16.4,396.9,17.92,21.5 +474,8.05579,0.0,18.1,0.0,0.584,5.427,95.4,2.4298,24.0,666.0,20.2,352.58,18.14,13.8 +247,0.19657,22.0,5.86,0.0,0.431,6.226,79.2,8.0555,7.0,330.0,19.1,376.14,10.15,20.5 +346,0.06162,0.0,4.39,0.0,0.442,5.898,52.3,8.0136,3.0,352.0,18.8,364.61,12.67,17.2 +288,0.0459,52.5,5.32,0.0,0.405,6.315,45.6,7.3172,6.0,293.0,16.6,396.9,7.6,22.3 +115,0.17134,0.0,10.01,0.0,0.547,5.928,88.2,2.4631,6.0,432.0,17.8,344.91,15.76,18.3 +486,5.69175,0.0,18.1,0.0,0.583,6.114,79.8,3.5459,24.0,666.0,20.2,392.68,14.98,19.1 +42,0.1415,0.0,6.91,0.0,0.448,6.169,6.6,5.7209,3.0,233.0,17.9,383.37,5.81,25.3 +481,5.70818,0.0,18.1,0.0,0.532,6.75,74.9,3.3317,24.0,666.0,20.2,393.07,7.74,23.7 +469,13.0751,0.0,18.1,0.0,0.58,5.713,56.7,2.8237,24.0,666.0,20.2,396.9,14.76,20.1 +246,0.33983,22.0,5.86,0.0,0.431,6.108,34.9,8.0555,7.0,330.0,19.1,390.18,9.16,24.3 +250,0.1403,22.0,5.86,0.0,0.431,6.487,13.0,7.3967,7.0,330.0,19.1,396.28,5.9,24.4 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_1.csv b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_1.csv new file mode 100755 index 0000000000..9bf366e4c2 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_1.csv @@ -0,0 +1,96 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,target +244,0.20608,22.0,5.86,0.0,0.431,5.593,76.5,7.9549,7.0,330.0,19.1,372.49,12.5,17.6 +94,0.04294,28.0,15.04,0.0,0.464,6.249,77.3,3.615,4.0,270.0,18.2,396.9,10.59,20.6 +291,0.07886,80.0,4.95,0.0,0.411,7.148,27.7,5.1167,4.0,245.0,19.2,396.9,3.56,37.3 +446,6.28807,0.0,18.1,0.0,0.74,6.341,96.4,2.072,24.0,666.0,20.2,318.01,17.79,14.9 +373,11.1081,0.0,18.1,0.0,0.668,4.906,100.0,1.1742,24.0,666.0,20.2,396.9,34.77,13.8 +358,5.20177,0.0,18.1,1.0,0.77,6.127,83.4,2.7227,24.0,666.0,20.2,395.43,11.48,22.7 +327,0.24103,0.0,7.38,0.0,0.493,6.083,43.7,5.4159,5.0,287.0,19.6,396.9,12.79,22.2 +8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5 +74,0.07896,0.0,12.83,0.0,0.437,6.273,6.0,4.2515,5.0,398.0,18.7,394.92,6.78,24.1 +184,0.08308,0.0,2.46,0.0,0.488,5.604,89.8,2.9879,3.0,193.0,17.8,391.0,13.98,26.4 +149,2.73397,0.0,19.58,0.0,0.871,5.597,94.9,1.5257,5.0,403.0,14.7,351.85,21.45,15.4 +49,0.21977,0.0,6.91,0.0,0.448,5.602,62.0,6.0877,3.0,233.0,17.9,396.9,16.2,19.4 +402,9.59571,0.0,18.1,0.0,0.693,6.404,100.0,1.639,24.0,666.0,20.2,376.11,20.31,12.1 +11,0.11747,12.5,7.87,0.0,0.524,6.009,82.9,6.2267,5.0,311.0,15.2,396.9,13.27,18.9 +145,2.37934,0.0,19.58,0.0,0.871,6.13,100.0,1.4191,5.0,403.0,14.7,172.91,27.8,13.8 +501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4 +41,0.12744,0.0,6.91,0.0,0.448,6.77,2.9,5.7209,3.0,233.0,17.9,385.41,4.84,26.6 +385,16.8118,0.0,18.1,0.0,0.7,5.277,98.1,1.4261,24.0,666.0,20.2,396.9,30.81,7.2 +97,0.12083,0.0,2.89,0.0,0.445,8.069,76.0,3.4952,2.0,276.0,18.0,396.9,4.21,38.7 +161,1.46336,0.0,19.58,0.0,0.605,7.489,90.8,1.9709,5.0,403.0,14.7,374.43,1.73,50.0 +435,11.1604,0.0,18.1,0.0,0.74,6.629,94.6,2.1247,24.0,666.0,20.2,109.85,23.27,13.4 +349,0.02899,40.0,1.25,0.0,0.429,6.939,34.5,8.7921,1.0,335.0,19.7,389.85,5.89,26.6 +217,0.07013,0.0,13.89,0.0,0.55,6.642,85.1,3.4211,5.0,276.0,16.4,392.78,9.69,28.7 +160,1.27346,0.0,19.58,1.0,0.605,6.25,92.6,1.7984,5.0,403.0,14.7,338.92,5.5,27.0 +375,19.6091,0.0,18.1,0.0,0.671,7.313,97.9,1.3163,24.0,666.0,20.2,396.9,13.44,15.0 +304,0.05515,33.0,2.18,0.0,0.472,7.236,41.1,4.022,7.0,222.0,18.4,393.68,6.93,36.1 +51,0.04337,21.0,5.64,0.0,0.439,6.115,63.0,6.8147,4.0,243.0,16.8,393.97,9.43,20.5 +338,0.03306,0.0,5.19,0.0,0.515,6.059,37.3,4.8122,5.0,224.0,20.2,396.14,8.51,20.6 +266,0.7857,20.0,3.97,0.0,0.647,7.014,84.6,2.1329,5.0,264.0,13.0,384.07,14.79,30.7 +305,0.05479,33.0,2.18,0.0,0.472,6.616,58.1,3.37,7.0,222.0,18.4,393.36,8.93,28.4 +183,0.10008,0.0,2.46,0.0,0.488,6.563,95.6,2.847,3.0,193.0,17.8,396.9,5.68,32.5 +344,0.03049,55.0,3.78,0.0,0.484,6.874,28.1,6.4654,5.0,370.0,17.6,387.97,4.61,31.2 +484,2.37857,0.0,18.1,0.0,0.583,5.871,41.9,3.724,24.0,666.0,20.2,370.73,13.34,20.6 +401,14.2362,0.0,18.1,0.0,0.693,6.343,100.0,1.5741,24.0,666.0,20.2,396.9,20.32,7.2 +89,0.05302,0.0,3.41,0.0,0.489,7.079,63.1,3.4145,2.0,270.0,17.8,396.06,5.7,28.7 +467,4.42228,0.0,18.1,0.0,0.584,6.003,94.5,2.5403,24.0,666.0,20.2,331.29,21.32,19.1 +490,0.20746,0.0,27.74,0.0,0.609,5.093,98.0,1.8226,4.0,711.0,20.1,318.43,29.68,8.1 +410,51.1358,0.0,18.1,0.0,0.597,5.757,100.0,1.413,24.0,666.0,20.2,2.6,10.11,15.0 +61,0.17171,25.0,5.13,0.0,0.453,5.966,93.4,6.8185,8.0,284.0,19.7,378.08,14.44,16.0 +194,0.01439,60.0,2.93,0.0,0.401,6.604,18.8,6.2196,1.0,265.0,15.6,376.7,4.38,29.1 +448,9.32909,0.0,18.1,0.0,0.713,6.185,98.7,2.2616,24.0,666.0,20.2,396.9,18.13,14.1 +234,0.44791,0.0,6.2,1.0,0.507,6.726,66.5,3.6519,8.0,307.0,17.4,360.2,8.05,29.0 +129,0.88125,0.0,21.89,0.0,0.624,5.637,94.7,1.9799,4.0,437.0,21.2,396.9,18.34,14.3 +294,0.08199,0.0,13.92,0.0,0.437,6.009,42.3,5.5027,4.0,289.0,16.0,396.9,10.4,21.7 +239,0.09252,30.0,4.93,0.0,0.428,6.606,42.2,6.1899,6.0,300.0,16.6,383.78,7.37,23.3 +386,24.3938,0.0,18.1,0.0,0.7,4.652,100.0,1.4672,24.0,666.0,20.2,396.9,28.28,10.5 +289,0.04297,52.5,5.32,0.0,0.405,6.565,22.9,7.3172,6.0,293.0,16.6,371.72,9.51,24.8 +427,37.6619,0.0,18.1,0.0,0.679,6.202,78.7,1.8629,24.0,666.0,20.2,18.82,14.52,10.9 +323,0.28392,0.0,7.38,0.0,0.493,5.708,74.3,4.7211,5.0,287.0,19.6,391.13,11.74,18.5 +343,0.02543,55.0,3.78,0.0,0.484,6.696,56.4,5.7321,5.0,370.0,17.6,396.9,7.18,23.9 +432,6.44405,0.0,18.1,0.0,0.584,6.425,74.8,2.2004,24.0,666.0,20.2,97.95,12.03,16.1 +356,8.98296,0.0,18.1,1.0,0.77,6.212,97.4,2.1222,24.0,666.0,20.2,377.73,17.6,17.8 +64,0.01951,17.5,1.38,0.0,0.4161,7.104,59.5,9.2229,3.0,216.0,18.6,393.24,8.05,33.0 +335,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01,21.1 +18,0.80271,0.0,8.14,0.0,0.538,5.456,36.6,3.7965,4.0,307.0,21.0,288.99,11.69,20.2 +109,0.26363,0.0,8.56,0.0,0.52,6.229,91.2,2.5451,5.0,384.0,20.9,391.23,15.55,19.4 +397,7.67202,0.0,18.1,0.0,0.693,5.747,98.9,1.6334,24.0,666.0,20.2,393.1,19.92,8.5 +75,0.09512,0.0,12.83,0.0,0.437,6.286,45.0,4.5026,5.0,398.0,18.7,383.23,8.94,21.4 +130,0.34006,0.0,21.89,0.0,0.624,6.458,98.9,2.1185,4.0,437.0,21.2,395.04,12.6,19.2 +63,0.1265,25.0,5.13,0.0,0.453,6.762,43.4,7.9809,8.0,284.0,19.7,395.58,9.5,25.0 +27,0.95577,0.0,8.14,0.0,0.538,6.047,88.8,4.4534,4.0,307.0,21.0,306.38,17.28,14.8 +30,1.13081,0.0,8.14,0.0,0.538,5.713,94.1,4.233,4.0,307.0,21.0,360.17,22.6,12.7 +238,0.08244,30.0,4.93,0.0,0.428,6.481,18.5,6.1899,6.0,300.0,16.6,379.41,6.36,23.7 +470,4.34879,0.0,18.1,0.0,0.58,6.167,84.0,3.0334,24.0,666.0,20.2,396.9,16.29,19.9 +471,4.03841,0.0,18.1,0.0,0.532,6.229,90.7,3.0993,24.0,666.0,20.2,395.33,12.87,19.6 +45,0.17142,0.0,6.91,0.0,0.448,5.682,33.8,5.1004,3.0,233.0,17.9,396.9,10.21,19.3 +224,0.31533,0.0,6.2,0.0,0.504,8.266,78.3,2.8944,8.0,307.0,17.4,385.05,4.14,44.8 +296,0.05372,0.0,13.92,0.0,0.437,6.549,51.0,5.9604,4.0,289.0,16.0,392.85,7.39,27.1 +185,0.06047,0.0,2.46,0.0,0.488,6.153,68.8,3.2797,3.0,193.0,17.8,387.11,13.15,29.6 +457,8.20058,0.0,18.1,0.0,0.713,5.936,80.3,2.7792,24.0,666.0,20.2,3.5,16.94,13.5 +249,0.19073,22.0,5.86,0.0,0.431,6.718,17.5,7.8265,7.0,330.0,19.1,393.74,6.56,26.2 +124,0.09849,0.0,25.65,0.0,0.581,5.879,95.8,2.0063,2.0,188.0,19.1,379.38,17.58,18.8 +103,0.21161,0.0,8.56,0.0,0.52,6.137,87.4,2.7147,5.0,384.0,20.9,394.47,13.44,19.3 +67,0.05789,12.5,6.07,0.0,0.409,5.878,21.4,6.498,4.0,345.0,18.9,396.21,8.1,22.0 +59,0.10328,25.0,5.13,0.0,0.453,5.927,47.2,6.932,8.0,284.0,19.7,396.9,9.22,19.6 +497,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1,18.3 +378,23.6482,0.0,18.1,0.0,0.671,6.38,96.2,1.3861,24.0,666.0,20.2,396.9,23.69,13.1 +468,15.5757,0.0,18.1,0.0,0.58,5.926,71.0,2.9084,24.0,666.0,20.2,368.74,18.13,19.1 +372,8.26725,0.0,18.1,1.0,0.668,5.875,89.6,1.1296,24.0,666.0,20.2,347.88,8.88,50.0 +418,73.5341,0.0,18.1,0.0,0.679,5.957,100.0,1.8026,24.0,666.0,20.2,16.45,20.62,8.8 +212,0.21719,0.0,10.59,1.0,0.489,5.807,53.8,3.6526,4.0,277.0,18.6,390.94,16.03,22.4 +253,0.36894,22.0,5.86,0.0,0.431,8.259,8.4,8.9067,7.0,330.0,19.1,396.9,3.54,42.8 +261,0.53412,20.0,3.97,0.0,0.647,7.52,89.4,2.1398,5.0,264.0,13.0,388.37,7.26,43.1 +447,9.92485,0.0,18.1,0.0,0.74,6.251,96.6,2.198,24.0,666.0,20.2,388.52,16.44,12.6 +502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6 +204,0.02009,95.0,2.68,0.0,0.4161,8.034,31.9,5.118,4.0,224.0,14.7,390.55,2.88,50.0 +10,0.22489,12.5,7.87,0.0,0.524,6.377,94.3,6.3467,5.0,311.0,15.2,392.52,20.45,15.0 +98,0.08187,0.0,2.89,0.0,0.445,7.82,36.9,3.4952,2.0,276.0,18.0,393.53,3.57,43.8 +34,1.61282,0.0,8.14,0.0,0.538,6.096,96.9,3.7598,4.0,307.0,21.0,248.31,20.34,13.5 +422,12.0482,0.0,18.1,0.0,0.614,5.648,87.6,1.9512,24.0,666.0,20.2,291.55,14.1,20.8 +92,0.04203,28.0,15.04,0.0,0.464,6.442,53.6,3.6659,4.0,270.0,18.2,395.01,8.16,22.9 +221,0.40771,0.0,6.2,1.0,0.507,6.164,91.3,3.048,8.0,307.0,17.4,395.24,21.46,21.7 +366,3.69695,0.0,18.1,0.0,0.718,4.963,91.4,1.7523,24.0,666.0,20.2,316.03,14.0,21.9 +270,0.29916,20.0,6.96,0.0,0.464,5.856,42.1,4.429,3.0,223.0,18.6,388.65,13.0,21.1 +82,0.03659,25.0,4.86,0.0,0.426,6.302,32.2,5.4007,4.0,281.0,19.0,396.9,6.72,24.8 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_2.csv b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_2.csv new file mode 100755 index 0000000000..c4f0195720 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_2.csv @@ -0,0 +1,96 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,target +153,2.14918,0.0,19.58,0.0,0.871,5.709,98.5,1.6232,5.0,403.0,14.7,261.95,15.79,19.4 +453,8.24809,0.0,18.1,0.0,0.713,7.393,99.3,2.4527,24.0,666.0,20.2,375.87,16.74,17.8 +6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9 +19,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28,18.2 +492,0.11132,0.0,27.74,0.0,0.609,5.983,83.5,2.1099,4.0,711.0,20.1,396.9,13.35,20.1 +229,0.44178,0.0,6.2,0.0,0.504,6.552,21.4,3.3751,8.0,307.0,17.4,380.34,3.76,31.5 +201,0.03445,82.5,2.03,0.0,0.415,6.162,38.4,6.27,2.0,348.0,14.7,393.77,7.43,24.1 +193,0.02187,60.0,2.93,0.0,0.401,6.8,9.9,6.2196,1.0,265.0,15.6,393.37,5.03,31.1 +79,0.08387,0.0,12.83,0.0,0.437,5.874,36.6,4.5026,5.0,398.0,18.7,396.06,9.1,20.3 +479,14.3337,0.0,18.1,0.0,0.614,6.229,88.0,1.9512,24.0,666.0,20.2,383.32,13.11,21.4 +299,0.05561,70.0,2.24,0.0,0.4,7.041,10.0,7.8278,5.0,358.0,14.8,371.58,4.74,29.0 +403,24.8017,0.0,18.1,0.0,0.693,5.349,96.0,1.7028,24.0,666.0,20.2,396.9,19.77,8.3 +482,5.73116,0.0,18.1,0.0,0.532,7.061,77.0,3.4106,24.0,666.0,20.2,395.28,7.01,25.0 +39,0.02763,75.0,2.95,0.0,0.428,6.595,21.8,5.4011,3.0,252.0,18.3,395.63,4.32,30.8 +169,2.44953,0.0,19.58,0.0,0.605,6.402,95.2,2.2625,5.0,403.0,14.7,330.04,11.32,22.3 +172,0.13914,0.0,4.05,0.0,0.51,5.572,88.5,2.5961,5.0,296.0,16.6,396.9,14.69,23.1 +213,0.14052,0.0,10.59,0.0,0.489,6.375,32.3,3.9454,4.0,277.0,18.6,385.81,9.38,28.1 +442,5.66637,0.0,18.1,0.0,0.74,6.219,100.0,2.0048,24.0,666.0,20.2,395.69,16.59,18.4 +395,8.71675,0.0,18.1,0.0,0.693,6.471,98.8,1.7257,24.0,666.0,20.2,391.98,17.12,13.1 +345,0.03113,0.0,4.39,0.0,0.442,6.014,48.5,8.0136,3.0,352.0,18.8,385.64,10.53,17.5 +321,0.18159,0.0,7.38,0.0,0.493,6.376,54.3,4.5404,5.0,287.0,19.6,396.9,6.87,23.1 +211,0.37578,0.0,10.59,1.0,0.489,5.404,88.6,3.665,4.0,277.0,18.6,395.24,23.98,19.3 +348,0.01501,80.0,2.01,0.0,0.435,6.635,29.7,8.344,4.0,280.0,17.0,390.94,5.99,24.5 +473,4.64689,0.0,18.1,0.0,0.614,6.98,67.6,2.5329,24.0,666.0,20.2,374.68,11.66,29.8 +219,0.11425,0.0,13.89,1.0,0.55,6.373,92.4,3.3633,5.0,276.0,16.4,393.74,10.5,23.0 +37,0.08014,0.0,5.96,0.0,0.499,5.85,41.5,3.9342,5.0,279.0,19.2,396.9,8.77,21.0 +341,0.01301,35.0,1.52,0.0,0.442,7.241,49.3,7.0379,1.0,284.0,15.5,394.74,5.49,32.7 +391,5.29305,0.0,18.1,0.0,0.7,6.051,82.5,2.1678,24.0,666.0,20.2,378.38,18.76,23.2 +96,0.11504,0.0,2.89,0.0,0.445,6.163,69.6,3.4952,2.0,276.0,18.0,391.83,11.34,21.4 +240,0.11329,30.0,4.93,0.0,0.428,6.897,54.3,6.3361,6.0,300.0,16.6,391.25,11.38,22.0 +118,0.13058,0.0,10.01,0.0,0.547,5.872,73.1,2.4775,6.0,432.0,17.8,338.63,15.37,20.4 +355,0.10659,80.0,1.91,0.0,0.413,5.936,19.5,10.5857,4.0,334.0,22.0,376.04,5.57,20.6 +406,20.7162,0.0,18.1,0.0,0.659,4.138,100.0,1.1781,24.0,666.0,20.2,370.22,23.34,11.9 +180,0.06588,0.0,2.46,0.0,0.488,7.765,83.3,2.741,3.0,193.0,17.8,395.56,7.56,39.8 +114,0.14231,0.0,10.01,0.0,0.547,6.254,84.2,2.2565,6.0,432.0,17.8,388.74,10.45,18.5 +400,25.0461,0.0,18.1,0.0,0.693,5.987,100.0,1.5888,24.0,666.0,20.2,396.9,26.77,5.6 +135,0.55778,0.0,21.89,0.0,0.624,6.335,98.2,2.1107,4.0,437.0,21.2,394.67,16.96,18.1 +99,0.0686,0.0,2.89,0.0,0.445,7.416,62.5,3.4952,2.0,276.0,18.0,396.9,6.19,33.2 +319,0.47547,0.0,9.9,0.0,0.544,6.113,58.8,4.0019,4.0,304.0,18.4,396.23,12.73,21.0 +148,2.33099,0.0,19.58,0.0,0.871,5.186,93.8,1.5296,5.0,403.0,14.7,356.99,28.32,17.8 +504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0 +429,9.33889,0.0,18.1,0.0,0.679,6.38,95.6,1.9682,24.0,666.0,20.2,60.72,24.08,9.5 +254,0.04819,80.0,3.64,0.0,0.392,6.108,32.0,9.2203,1.0,315.0,16.4,392.89,6.57,21.9 +362,3.67822,0.0,18.1,0.0,0.77,5.362,96.2,2.1036,24.0,666.0,20.2,380.79,10.19,20.8 +187,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68,32.0 +274,0.05644,40.0,6.41,1.0,0.447,6.758,32.9,4.0776,4.0,254.0,17.6,396.9,3.53,32.4 +121,0.07165,0.0,25.65,0.0,0.581,6.004,84.1,2.1974,2.0,188.0,19.1,377.67,14.27,20.3 +155,3.53501,0.0,19.58,1.0,0.871,6.152,82.6,1.7455,5.0,403.0,14.7,88.01,15.02,15.6 +77,0.08707,0.0,12.83,0.0,0.437,6.14,45.8,4.0905,5.0,398.0,18.7,386.96,10.27,20.8 +44,0.12269,0.0,6.91,0.0,0.448,6.069,40.0,5.7209,3.0,233.0,17.9,389.39,9.55,21.2 +158,1.34284,0.0,19.58,0.0,0.605,6.066,100.0,1.7573,5.0,403.0,14.7,353.89,6.43,24.3 +487,4.83567,0.0,18.1,0.0,0.583,5.905,53.2,3.1523,24.0,666.0,20.2,388.22,11.45,20.6 +189,0.0837,45.0,3.44,0.0,0.437,7.185,38.9,4.5667,5.0,398.0,15.2,396.9,5.39,34.9 +206,0.22969,0.0,10.59,0.0,0.489,6.326,52.5,4.3549,4.0,277.0,18.6,394.87,10.97,24.4 +472,3.56868,0.0,18.1,0.0,0.58,6.437,75.0,2.8965,24.0,666.0,20.2,393.37,14.36,23.2 +43,0.15936,0.0,6.91,0.0,0.448,6.211,6.5,5.7209,3.0,233.0,17.9,394.46,7.44,24.7 +156,2.44668,0.0,19.58,0.0,0.871,5.272,94.0,1.7364,5.0,403.0,14.7,88.63,16.14,13.1 +176,0.07022,0.0,4.05,0.0,0.51,6.02,47.2,3.5549,5.0,296.0,16.6,393.23,10.11,23.2 +142,3.32105,0.0,19.58,1.0,0.871,5.403,100.0,1.3216,5.0,403.0,14.7,396.9,26.82,13.4 +123,0.15038,0.0,25.65,0.0,0.581,5.856,97.0,1.9444,2.0,188.0,19.1,370.31,25.41,17.3 +164,2.24236,0.0,19.58,0.0,0.605,5.854,91.8,2.422,5.0,403.0,14.7,395.11,11.64,22.7 +100,0.14866,0.0,8.56,0.0,0.52,6.727,79.9,2.7778,5.0,384.0,20.9,394.76,9.42,27.5 +424,8.79212,0.0,18.1,0.0,0.584,5.565,70.6,2.0635,24.0,666.0,20.2,3.65,17.16,11.7 +336,0.03427,0.0,5.19,0.0,0.515,5.869,46.3,5.2311,5.0,224.0,20.2,396.9,9.8,19.5 +243,0.12757,30.0,4.93,0.0,0.428,6.393,7.8,7.0355,6.0,300.0,16.6,374.71,5.19,23.7 +421,7.02259,0.0,18.1,0.0,0.718,6.006,95.3,1.8746,24.0,666.0,20.2,319.98,15.7,14.2 +317,0.24522,0.0,9.9,0.0,0.544,5.782,71.7,4.0317,4.0,304.0,18.4,396.9,15.94,19.8 +331,0.05023,35.0,6.06,0.0,0.4379,5.706,28.4,6.6407,1.0,304.0,16.9,394.02,12.43,17.1 +505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9 +68,0.13554,12.5,6.07,0.0,0.409,5.594,36.8,6.498,4.0,345.0,18.9,396.9,13.09,17.4 +166,2.01019,0.0,19.58,0.0,0.605,7.929,96.2,2.0459,5.0,403.0,14.7,369.3,3.7,50.0 +259,0.65665,20.0,3.97,0.0,0.647,6.842,100.0,2.0107,5.0,264.0,13.0,391.93,6.9,30.1 +78,0.05646,0.0,12.83,0.0,0.437,6.232,53.7,5.0141,5.0,398.0,18.7,386.4,12.34,21.2 +322,0.35114,0.0,7.38,0.0,0.493,6.041,49.9,4.7211,5.0,287.0,19.6,396.9,7.7,20.4 +273,0.22188,20.0,6.96,1.0,0.464,7.691,51.8,4.3665,3.0,223.0,18.6,390.77,6.58,35.2 +287,0.03871,52.5,5.32,0.0,0.405,6.209,31.3,7.3172,6.0,293.0,16.6,396.9,7.14,23.2 +278,0.07978,40.0,6.41,0.0,0.447,6.482,32.1,4.1403,4.0,254.0,17.6,396.9,7.19,29.1 +85,0.05735,0.0,4.49,0.0,0.449,6.63,56.1,4.4377,3.0,247.0,18.5,392.3,6.53,26.6 +188,0.12579,45.0,3.44,0.0,0.437,6.556,29.1,4.5667,5.0,398.0,15.2,382.84,4.56,29.8 +295,0.12932,0.0,13.92,0.0,0.437,6.678,31.1,5.9604,4.0,289.0,16.0,396.9,6.27,28.6 +359,4.26131,0.0,18.1,0.0,0.77,6.112,81.3,2.5091,24.0,666.0,20.2,390.74,12.67,22.6 +494,0.27957,0.0,9.69,0.0,0.585,5.926,42.6,2.3817,6.0,391.0,19.2,396.9,13.59,24.5 +463,5.82115,0.0,18.1,0.0,0.713,6.513,89.9,2.8016,24.0,666.0,20.2,393.82,10.29,20.2 +277,0.06127,40.0,6.41,1.0,0.447,6.826,27.6,4.8628,4.0,254.0,17.6,393.45,4.16,33.1 +143,4.0974,0.0,19.58,0.0,0.871,5.468,100.0,1.4118,5.0,403.0,14.7,396.9,26.42,15.6 +364,3.47428,0.0,18.1,1.0,0.718,8.78,82.9,1.9047,24.0,666.0,20.2,354.55,5.29,21.9 +466,3.77498,0.0,18.1,0.0,0.655,5.952,84.7,2.8715,24.0,666.0,20.2,22.01,17.15,19.0 +280,0.03578,20.0,3.33,0.0,0.4429,7.82,64.5,4.6947,5.0,216.0,14.9,387.31,3.76,45.4 +382,9.18702,0.0,18.1,0.0,0.7,5.536,100.0,1.5804,24.0,666.0,20.2,396.9,23.6,11.3 +441,9.72418,0.0,18.1,0.0,0.74,6.406,97.2,2.0651,24.0,666.0,20.2,385.96,19.52,17.1 +352,0.07244,60.0,1.69,0.0,0.411,5.884,18.5,10.7103,4.0,411.0,18.3,392.33,7.79,18.6 +56,0.02055,85.0,0.74,0.0,0.41,6.383,35.7,9.1876,2.0,313.0,17.3,396.9,5.77,24.7 +478,10.233,0.0,18.1,0.0,0.614,6.185,96.7,2.1705,24.0,666.0,20.2,379.7,18.03,14.6 +196,0.04011,80.0,1.52,0.0,0.404,7.287,34.1,7.309,2.0,329.0,12.6,396.9,4.08,33.3 +154,1.41385,0.0,19.58,1.0,0.871,6.129,96.0,1.7494,5.0,403.0,14.7,321.02,15.12,17.0 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_3.csv b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_3.csv new file mode 100755 index 0000000000..72ff937fd8 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_3.csv @@ -0,0 +1,96 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,target +379,17.8667,0.0,18.1,0.0,0.671,6.223,100.0,1.3861,24.0,666.0,20.2,393.74,21.78,10.2 +350,0.06211,40.0,1.25,0.0,0.429,6.49,44.4,8.7921,1.0,335.0,19.7,396.9,5.98,22.9 +408,7.40389,0.0,18.1,0.0,0.597,5.617,97.9,1.4547,24.0,666.0,20.2,314.64,26.4,17.2 +237,0.51183,0.0,6.2,0.0,0.507,7.358,71.6,4.148,8.0,307.0,17.4,390.07,4.73,31.5 +182,0.09103,0.0,2.46,0.0,0.488,7.155,92.2,2.7006,3.0,193.0,17.8,394.12,4.82,37.9 +66,0.04379,80.0,3.37,0.0,0.398,5.787,31.1,6.6115,4.0,337.0,16.1,396.9,10.24,19.4 +40,0.03359,75.0,2.95,0.0,0.428,7.024,15.8,5.4011,3.0,252.0,18.3,395.62,1.98,34.9 +360,4.54192,0.0,18.1,0.0,0.77,6.398,88.0,2.5182,24.0,666.0,20.2,374.56,7.79,25.0 +260,0.54011,20.0,3.97,0.0,0.647,7.203,81.8,2.1121,5.0,264.0,13.0,392.8,9.59,33.8 +443,9.96654,0.0,18.1,0.0,0.74,6.485,100.0,1.9784,24.0,666.0,20.2,386.73,18.85,15.4 +496,0.2896,0.0,9.69,0.0,0.585,5.39,72.9,2.7986,6.0,391.0,19.2,396.9,21.14,19.7 +28,0.77299,0.0,8.14,0.0,0.538,6.495,94.4,4.4547,4.0,307.0,21.0,387.94,12.8,18.4 +177,0.05425,0.0,4.05,0.0,0.51,6.315,73.4,3.3175,5.0,296.0,16.6,395.6,6.29,24.6 +420,11.0874,0.0,18.1,0.0,0.718,6.411,100.0,1.8589,24.0,666.0,20.2,318.75,15.02,16.7 +106,0.1712,0.0,8.56,0.0,0.52,5.836,91.9,2.211,5.0,384.0,20.9,395.67,18.66,19.5 +69,0.12816,12.5,6.07,0.0,0.409,5.885,33.0,6.498,4.0,345.0,18.9,396.9,8.79,20.9 +439,9.39063,0.0,18.1,0.0,0.74,5.627,93.9,1.8172,24.0,666.0,20.2,396.9,22.88,12.8 +340,0.06151,0.0,5.19,0.0,0.515,5.968,58.5,4.8122,5.0,224.0,20.2,396.9,9.29,18.7 +54,0.0136,75.0,4.0,0.0,0.41,5.888,47.6,7.3197,3.0,469.0,21.1,396.9,14.8,18.9 +151,1.49632,0.0,19.58,0.0,0.871,5.404,100.0,1.5916,5.0,403.0,14.7,341.6,13.28,19.6 +377,9.82349,0.0,18.1,0.0,0.671,6.794,98.8,1.358,24.0,666.0,20.2,396.9,21.24,13.3 +425,15.8603,0.0,18.1,0.0,0.679,5.896,95.4,1.9096,24.0,666.0,20.2,7.68,24.39,8.3 +233,0.33147,0.0,6.2,0.0,0.507,8.247,70.4,3.6519,8.0,307.0,17.4,378.95,3.95,48.3 +320,0.1676,0.0,7.38,0.0,0.493,6.426,52.3,4.5404,5.0,287.0,19.6,396.9,7.2,23.8 +202,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11,42.3 +50,0.08873,21.0,5.64,0.0,0.439,5.963,45.7,6.8147,4.0,243.0,16.8,395.56,13.45,19.7 +445,10.6718,0.0,18.1,0.0,0.74,6.459,94.8,1.9879,24.0,666.0,20.2,43.06,23.98,11.8 +465,3.1636,0.0,18.1,0.0,0.655,5.759,48.2,3.0665,24.0,666.0,20.2,334.4,14.13,19.9 +255,0.03548,80.0,3.64,0.0,0.392,5.876,19.1,9.2203,1.0,315.0,16.4,395.18,9.25,20.9 +498,0.23912,0.0,9.69,0.0,0.585,6.019,65.3,2.4091,6.0,391.0,19.2,396.9,12.92,21.2 +488,0.15086,0.0,27.74,0.0,0.609,5.454,92.7,1.8209,4.0,711.0,20.1,395.09,18.06,15.2 +500,0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33,16.8 +47,0.22927,0.0,6.91,0.0,0.448,6.03,85.5,5.6894,3.0,233.0,17.9,392.74,18.8,16.6 +231,0.46296,0.0,6.2,0.0,0.504,7.412,76.9,3.6715,8.0,307.0,17.4,376.14,5.25,31.7 +147,2.36862,0.0,19.58,0.0,0.871,4.926,95.7,1.4608,5.0,403.0,14.7,391.71,29.53,14.6 +263,0.82526,20.0,3.97,0.0,0.647,7.327,94.5,2.0788,5.0,264.0,13.0,393.42,11.25,31.0 +0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0 +430,8.49213,0.0,18.1,0.0,0.584,6.348,86.1,2.0527,24.0,666.0,20.2,83.45,17.64,14.5 +282,0.06129,20.0,3.33,1.0,0.4429,7.645,49.7,5.2119,5.0,216.0,14.9,377.07,3.01,46.0 +203,0.0351,95.0,2.68,0.0,0.4161,7.853,33.2,5.118,4.0,224.0,14.7,392.78,3.81,48.5 +134,0.97617,0.0,21.89,0.0,0.624,5.757,98.4,2.346,4.0,437.0,21.2,262.76,17.31,15.6 +31,1.35472,0.0,8.14,0.0,0.538,6.072,100.0,4.175,4.0,307.0,21.0,376.73,13.04,14.5 +138,0.2498,0.0,21.89,0.0,0.624,5.857,98.2,1.6686,4.0,437.0,21.2,392.04,21.32,13.3 +91,0.03932,0.0,3.41,0.0,0.489,6.405,73.9,3.0921,2.0,270.0,17.8,393.55,8.2,22.0 +21,0.85204,0.0,8.14,0.0,0.538,5.965,89.2,4.0123,4.0,307.0,21.0,392.53,13.83,19.6 +71,0.15876,0.0,10.81,0.0,0.413,5.961,17.5,5.2873,4.0,305.0,19.2,376.94,9.88,21.7 +314,0.3692,0.0,9.9,0.0,0.544,6.567,87.3,3.6023,4.0,304.0,18.4,395.69,9.28,23.8 +152,1.12658,0.0,19.58,1.0,0.871,5.012,88.0,1.6102,5.0,403.0,14.7,343.28,12.12,15.3 +29,1.00245,0.0,8.14,0.0,0.538,6.674,87.3,4.239,4.0,307.0,21.0,380.23,11.98,21.0 +483,2.81838,0.0,18.1,0.0,0.532,5.762,40.3,4.0983,24.0,666.0,20.2,392.92,10.42,21.8 +38,0.17505,0.0,5.96,0.0,0.499,5.966,30.2,3.8473,5.0,279.0,19.2,393.43,10.13,24.7 +119,0.14476,0.0,10.01,0.0,0.547,5.731,65.2,2.7592,6.0,432.0,17.8,391.5,13.61,19.3 +252,0.08221,22.0,5.86,0.0,0.431,6.957,6.8,8.9067,7.0,330.0,19.1,386.09,3.53,29.6 +415,18.0846,0.0,18.1,0.0,0.679,6.434,100.0,1.8347,24.0,666.0,20.2,27.25,29.05,7.2 +389,8.15174,0.0,18.1,0.0,0.7,5.39,98.9,1.7281,24.0,666.0,20.2,396.9,20.85,11.5 +404,41.5292,0.0,18.1,0.0,0.693,5.531,85.4,1.6074,24.0,666.0,20.2,329.46,27.38,8.5 +248,0.16439,22.0,5.86,0.0,0.431,6.433,49.1,7.8265,7.0,330.0,19.1,374.71,9.52,24.5 +190,0.09068,45.0,3.44,0.0,0.437,6.951,21.5,6.4798,5.0,398.0,15.2,377.68,5.1,37.0 +452,5.09017,0.0,18.1,0.0,0.713,6.297,91.8,2.3682,24.0,666.0,20.2,385.09,17.27,16.1 +310,2.63548,0.0,9.9,0.0,0.544,4.973,37.8,2.5194,4.0,304.0,18.4,350.45,12.64,16.1 +170,1.20742,0.0,19.58,0.0,0.605,5.875,94.6,2.4259,5.0,403.0,14.7,292.29,14.43,17.4 +437,15.1772,0.0,18.1,0.0,0.74,6.152,100.0,1.9142,24.0,666.0,20.2,9.32,26.45,8.7 +146,2.15505,0.0,19.58,0.0,0.871,5.628,100.0,1.5166,5.0,403.0,14.7,169.27,16.65,15.6 +216,0.0456,0.0,13.89,1.0,0.55,5.888,56.0,3.1121,5.0,276.0,16.4,392.8,13.51,23.3 +333,0.05083,0.0,5.19,0.0,0.515,6.316,38.1,6.4584,5.0,224.0,20.2,389.71,5.68,22.2 +311,0.79041,0.0,9.9,0.0,0.544,6.122,52.8,2.6403,4.0,304.0,18.4,396.9,5.98,22.1 +52,0.0536,21.0,5.64,0.0,0.439,6.511,21.1,6.8147,4.0,243.0,16.8,396.9,5.28,25.0 +413,28.6558,0.0,18.1,0.0,0.597,5.155,100.0,1.5894,24.0,666.0,20.2,210.97,20.08,16.3 +35,0.06417,0.0,5.96,0.0,0.499,5.933,68.2,3.3603,5.0,279.0,19.2,396.9,9.68,18.9 +205,0.13642,0.0,10.59,0.0,0.489,5.891,22.3,3.9454,4.0,277.0,18.6,396.9,10.87,22.6 +499,0.17783,0.0,9.69,0.0,0.585,5.569,73.5,2.3999,6.0,391.0,19.2,395.77,15.1,17.5 +307,0.04932,33.0,2.18,0.0,0.472,6.849,70.3,3.1827,7.0,222.0,18.4,396.9,7.53,28.2 +86,0.05188,0.0,4.49,0.0,0.449,6.015,45.1,4.4272,3.0,247.0,18.5,395.99,12.86,22.5 +272,0.1146,20.0,6.96,0.0,0.464,6.538,58.7,3.9175,3.0,223.0,18.6,394.96,7.73,24.4 +451,5.44114,0.0,18.1,0.0,0.713,6.655,98.2,2.3552,24.0,666.0,20.2,355.29,17.73,15.2 +222,0.62356,0.0,6.2,1.0,0.507,6.879,77.7,3.2721,8.0,307.0,17.4,390.39,9.93,27.5 +112,0.12329,0.0,10.01,0.0,0.547,5.913,92.9,2.3534,6.0,432.0,17.8,394.95,16.21,18.8 +167,1.80028,0.0,19.58,0.0,0.605,5.877,79.2,2.4259,5.0,403.0,14.7,227.61,12.14,23.8 +12,0.09378,12.5,7.87,0.0,0.524,5.889,39.0,5.4509,5.0,311.0,15.2,390.5,15.71,21.7 +477,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91,12.0 +267,0.57834,20.0,3.97,0.0,0.575,8.297,67.0,2.4216,5.0,264.0,13.0,384.54,7.44,50.0 +265,0.76162,20.0,3.97,0.0,0.647,5.56,62.8,1.9865,5.0,264.0,13.0,392.4,10.45,22.8 +215,0.19802,0.0,10.59,0.0,0.489,6.182,42.4,3.9454,4.0,277.0,18.6,393.63,9.47,25.0 +105,0.13262,0.0,8.56,0.0,0.52,5.851,96.7,2.1069,5.0,384.0,20.9,394.05,16.47,19.5 +374,18.4982,0.0,18.1,0.0,0.668,4.138,100.0,1.137,24.0,666.0,20.2,396.9,37.97,13.8 +384,20.0849,0.0,18.1,0.0,0.7,4.368,91.2,1.4395,24.0,666.0,20.2,285.83,30.63,8.8 +383,7.99248,0.0,18.1,0.0,0.7,5.52,100.0,1.5331,24.0,666.0,20.2,396.9,24.56,12.3 +173,0.09178,0.0,4.05,0.0,0.51,6.416,84.1,2.6463,5.0,296.0,16.6,395.5,9.04,23.6 +330,0.04544,0.0,3.24,0.0,0.46,6.144,32.2,5.8736,4.0,430.0,16.9,368.57,9.09,19.8 +434,13.9134,0.0,18.1,0.0,0.713,6.208,95.0,2.2222,24.0,666.0,20.2,100.63,15.17,11.7 +209,0.43571,0.0,10.59,1.0,0.489,5.344,100.0,3.875,4.0,277.0,18.6,396.9,23.09,20.0 +419,11.8123,0.0,18.1,0.0,0.718,6.824,76.5,1.794,24.0,666.0,20.2,48.45,22.74,8.4 +26,0.67191,0.0,8.14,0.0,0.538,5.813,90.3,4.682,4.0,307.0,21.0,376.88,14.81,16.6 +462,6.65492,0.0,18.1,0.0,0.713,6.317,83.0,2.7344,24.0,666.0,20.2,396.9,13.99,19.5 +458,7.75223,0.0,18.1,0.0,0.713,6.301,83.7,2.7831,24.0,666.0,20.2,272.21,16.23,14.9 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_4.csv b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_4.csv new file mode 100755 index 0000000000..ab41622cc3 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/data/distributed_data/linear_regression_train_4.csv @@ -0,0 +1,95 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,target +476,4.87141,0.0,18.1,0.0,0.614,6.484,93.6,2.3053,24.0,666.0,20.2,396.21,18.68,16.7 +354,0.04301,80.0,1.91,0.0,0.413,5.663,21.9,10.5857,4.0,334.0,22.0,382.8,8.05,18.2 +101,0.11432,0.0,8.56,0.0,0.52,6.781,71.3,2.8561,5.0,384.0,20.9,395.58,7.67,26.5 +256,0.01538,90.0,3.75,0.0,0.394,7.454,34.2,6.3361,3.0,244.0,15.9,386.34,3.11,44.0 +339,0.05497,0.0,5.19,0.0,0.515,5.985,45.4,4.8122,5.0,224.0,20.2,396.9,9.74,19.0 +2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7 +390,6.96215,0.0,18.1,0.0,0.7,5.713,97.0,1.9265,24.0,666.0,20.2,394.43,17.11,15.1 +174,0.08447,0.0,4.05,0.0,0.51,5.859,68.7,2.7019,5.0,296.0,16.6,393.23,9.64,22.6 +337,0.03041,0.0,5.19,0.0,0.515,5.895,59.6,5.615,5.0,224.0,20.2,394.81,10.56,18.5 +9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9 +464,7.83932,0.0,18.1,0.0,0.655,6.209,65.4,2.9634,24.0,666.0,20.2,396.9,13.22,21.4 +381,15.8744,0.0,18.1,0.0,0.671,6.545,99.1,1.5192,24.0,666.0,20.2,396.9,21.08,10.9 +200,0.01778,95.0,1.47,0.0,0.403,7.135,13.9,7.6534,3.0,402.0,17.0,384.3,4.45,32.9 +150,1.6566,0.0,19.58,0.0,0.871,6.122,97.3,1.618,5.0,403.0,14.7,372.8,14.1,21.5 +181,0.06888,0.0,2.46,0.0,0.488,6.144,62.2,2.5979,3.0,193.0,17.8,396.9,9.45,36.2 +351,0.0795,60.0,1.69,0.0,0.411,6.579,35.9,10.7103,4.0,411.0,18.3,370.78,5.49,24.1 +450,6.71772,0.0,18.1,0.0,0.713,6.749,92.6,2.3236,24.0,666.0,20.2,0.32,17.44,13.4 +423,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29,13.4 +303,0.1,34.0,6.09,0.0,0.433,6.982,17.7,5.4917,7.0,329.0,16.1,390.43,4.86,33.1 +16,1.05393,0.0,8.14,0.0,0.538,5.935,29.3,4.4986,4.0,307.0,21.0,386.85,6.58,23.1 +455,4.75237,0.0,18.1,0.0,0.713,6.525,86.5,2.4358,24.0,666.0,20.2,50.92,18.13,14.1 +329,0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34,22.6 +334,0.03738,0.0,5.19,0.0,0.515,6.31,38.5,6.4584,5.0,224.0,20.2,389.4,6.75,20.7 +387,22.5971,0.0,18.1,0.0,0.7,5.0,89.5,1.5184,24.0,666.0,20.2,396.9,31.99,7.4 +312,0.26169,0.0,9.9,0.0,0.544,6.023,90.4,2.834,4.0,304.0,18.4,396.3,11.72,19.4 +271,0.16211,20.0,6.96,0.0,0.464,6.24,16.3,4.429,3.0,223.0,18.6,396.9,6.59,25.2 +308,0.49298,0.0,9.9,0.0,0.544,6.635,82.5,3.3175,4.0,304.0,18.4,396.9,4.54,22.8 +396,5.87205,0.0,18.1,0.0,0.693,6.405,96.0,1.6768,24.0,666.0,20.2,396.9,19.37,12.5 +17,0.7842,0.0,8.14,0.0,0.538,5.99,81.7,4.2579,4.0,307.0,21.0,386.75,14.67,17.5 +104,0.1396,0.0,8.56,0.0,0.52,6.167,90.0,2.421,5.0,384.0,20.9,392.69,12.33,20.1 +140,0.2909,0.0,21.89,0.0,0.624,6.174,93.6,1.6119,4.0,437.0,21.2,388.08,24.16,14.0 +281,0.03705,20.0,3.33,0.0,0.4429,6.968,37.2,5.2447,5.0,216.0,14.9,392.23,4.59,35.4 +207,0.25199,0.0,10.59,0.0,0.489,5.783,72.7,4.3549,4.0,277.0,18.6,389.43,18.06,22.5 +365,4.55587,0.0,18.1,0.0,0.718,3.561,87.9,1.6132,24.0,666.0,20.2,354.7,7.12,27.5 +503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9 +361,3.83684,0.0,18.1,0.0,0.77,6.251,91.1,2.2955,24.0,666.0,20.2,350.65,14.19,19.9 +491,0.10574,0.0,27.74,0.0,0.609,5.983,98.8,1.8681,4.0,711.0,20.1,390.11,18.07,13.6 +88,0.0566,0.0,3.41,0.0,0.489,7.007,86.3,3.4217,2.0,270.0,17.8,396.9,5.5,23.6 +332,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83,19.4 +192,0.08664,45.0,3.44,0.0,0.437,7.178,26.3,6.4798,5.0,398.0,15.2,390.49,2.87,36.4 +81,0.04462,25.0,4.86,0.0,0.426,6.619,70.4,5.4007,4.0,281.0,19.0,395.63,7.22,23.9 +197,0.04666,80.0,1.52,0.0,0.404,7.107,36.6,7.309,2.0,329.0,12.6,354.31,8.61,30.3 +4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2 +57,0.01432,100.0,1.32,0.0,0.411,6.816,40.5,8.3248,5.0,256.0,15.1,392.9,3.95,31.6 +22,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72,15.2 +139,0.54452,0.0,21.89,0.0,0.624,6.151,97.9,1.6687,4.0,437.0,21.2,396.9,18.46,17.8 +275,0.09604,40.0,6.41,0.0,0.447,6.854,42.8,4.2673,4.0,254.0,17.6,396.9,2.98,32.0 +301,0.03537,34.0,6.09,0.0,0.433,6.59,40.4,5.4917,7.0,329.0,16.1,395.75,9.5,22.0 +163,1.51902,0.0,19.58,1.0,0.605,8.375,93.9,2.162,5.0,403.0,14.7,388.45,3.32,50.0 +235,0.33045,0.0,6.2,0.0,0.507,6.086,61.5,3.6519,8.0,307.0,17.4,376.75,10.88,24.0 +347,0.0187,85.0,4.15,0.0,0.429,6.516,27.7,8.5353,4.0,351.0,17.9,392.43,6.36,23.1 +411,14.0507,0.0,18.1,0.0,0.597,6.657,100.0,1.5275,24.0,666.0,20.2,35.05,21.22,17.2 +428,7.36711,0.0,18.1,0.0,0.679,6.193,78.1,1.9356,24.0,666.0,20.2,96.73,21.52,11.0 +127,0.25915,0.0,21.89,0.0,0.624,5.693,96.0,1.7883,4.0,437.0,21.2,392.11,17.19,16.2 +48,0.25387,0.0,6.91,0.0,0.448,5.399,95.3,5.87,3.0,233.0,17.9,396.9,30.81,14.4 +370,6.53876,0.0,18.1,1.0,0.631,7.016,97.5,1.2024,24.0,666.0,20.2,392.05,2.96,50.0 +417,25.9406,0.0,18.1,0.0,0.679,5.304,89.1,1.6475,24.0,666.0,20.2,127.36,26.64,10.4 +58,0.15445,25.0,5.13,0.0,0.453,6.145,29.2,7.8148,8.0,284.0,19.7,390.68,6.86,23.3 +23,0.98843,0.0,8.14,0.0,0.538,5.813,100.0,4.0952,4.0,307.0,21.0,394.54,19.88,14.5 +232,0.57529,0.0,6.2,0.0,0.507,8.337,73.3,3.8384,8.0,307.0,17.4,385.91,2.47,41.7 +73,0.19539,0.0,10.81,0.0,0.413,6.245,6.2,5.2873,4.0,305.0,19.2,377.17,7.54,23.4 +426,12.2472,0.0,18.1,0.0,0.584,5.837,59.7,1.9976,24.0,666.0,20.2,24.65,15.69,10.2 +120,0.06899,0.0,25.65,0.0,0.581,5.87,69.7,2.2577,2.0,188.0,19.1,389.15,14.37,22.0 +407,11.9511,0.0,18.1,0.0,0.659,5.608,100.0,1.2852,24.0,666.0,20.2,332.09,12.13,27.9 +326,0.30347,0.0,7.38,0.0,0.493,6.312,28.9,5.4159,5.0,287.0,19.6,396.9,6.15,23.0 +268,0.5405,20.0,3.97,0.0,0.575,7.47,52.6,2.872,5.0,264.0,13.0,390.3,3.16,43.5 +245,0.19133,22.0,5.86,0.0,0.431,5.605,70.2,7.9549,7.0,330.0,19.1,389.13,18.46,18.5 +76,0.10153,0.0,12.83,0.0,0.437,6.279,74.5,4.0522,5.0,398.0,18.7,373.66,11.97,20.0 +110,0.10793,0.0,8.56,0.0,0.52,6.195,54.4,2.7778,5.0,384.0,20.9,393.49,13.0,21.7 +13,0.62976,0.0,8.14,0.0,0.538,5.949,61.8,4.7075,4.0,307.0,21.0,396.9,8.26,20.4 +316,0.31827,0.0,9.9,0.0,0.544,5.914,83.2,3.9986,4.0,304.0,18.4,390.7,18.33,17.8 +165,2.924,0.0,19.58,0.0,0.605,6.101,93.0,2.2834,5.0,403.0,14.7,240.16,9.81,25.0 +444,12.8023,0.0,18.1,0.0,0.74,5.854,96.6,1.8956,24.0,666.0,20.2,240.52,23.79,10.8 +324,0.34109,0.0,7.38,0.0,0.493,6.415,40.1,4.7211,5.0,287.0,19.6,396.9,6.12,25.0 +251,0.21409,22.0,5.86,0.0,0.431,6.438,8.9,7.3967,7.0,330.0,19.1,377.07,3.59,24.8 +116,0.13158,0.0,10.01,0.0,0.547,6.176,72.5,2.7301,6.0,432.0,17.8,393.3,12.04,21.2 +342,0.02498,0.0,1.89,0.0,0.518,6.54,59.7,6.2669,1.0,422.0,15.9,389.96,8.65,16.5 +72,0.09164,0.0,10.81,0.0,0.413,6.065,7.8,5.2873,4.0,305.0,19.2,390.91,5.52,22.8 +297,0.14103,0.0,13.92,0.0,0.437,5.79,58.0,6.32,4.0,289.0,16.0,396.9,15.84,20.3 +380,88.9762,0.0,18.1,0.0,0.671,6.968,91.9,1.4165,24.0,666.0,20.2,396.9,17.21,10.4 +279,0.21038,20.0,3.33,0.0,0.4429,6.812,32.2,4.1007,5.0,216.0,14.9,396.9,4.85,35.1 +371,9.2323,0.0,18.1,0.0,0.631,6.216,100.0,1.1691,24.0,666.0,20.2,366.15,9.53,50.0 +368,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26,50.0 +300,0.04417,70.0,2.24,0.0,0.4,6.871,47.4,7.8278,5.0,358.0,14.8,390.86,6.07,24.8 +298,0.06466,70.0,2.24,0.0,0.4,6.345,20.1,7.8278,5.0,358.0,14.8,368.24,4.97,22.5 +306,0.07503,33.0,2.18,0.0,0.472,7.42,71.9,3.0992,7.0,222.0,18.4,396.9,6.47,33.4 +227,0.41238,0.0,6.2,0.0,0.504,7.163,79.9,3.2157,8.0,307.0,17.4,372.08,6.36,31.6 +208,0.13587,0.0,10.59,1.0,0.489,6.064,59.1,4.2392,4.0,277.0,18.6,381.32,14.66,24.4 +293,0.08265,0.0,13.92,0.0,0.437,6.127,18.4,5.5027,4.0,289.0,16.0,396.9,8.58,23.9 +46,0.18836,0.0,6.91,0.0,0.448,5.786,33.3,5.1004,3.0,233.0,17.9,396.9,14.15,20.0 +412,18.811,0.0,18.1,0.0,0.597,4.628,100.0,1.5539,24.0,666.0,20.2,28.79,34.37,17.9 +5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7 +144,2.77974,0.0,19.58,0.0,0.871,4.903,97.8,1.3459,5.0,403.0,14.7,396.9,29.29,11.8 +318,0.40202,0.0,9.9,0.0,0.544,6.382,67.2,3.5325,4.0,304.0,18.4,395.21,10.36,23.1 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/models/store_models_in_this_folder.txt b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/models/store_models_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/results/store_results_in_this_folder.txt b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/results/store_results_in_this_folder.txt new file mode 100755 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDAL/daal4py_Distributed_LinearRegression/sample.json b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/sample.json new file mode 100755 index 0000000000..999b8a7180 --- /dev/null +++ b/Libraries/oneDAL/daal4py_Distributed_LinearRegression/sample.json @@ -0,0 +1,22 @@ +{ + "guid": "ED2952EA-04CB-4353-9FE6-80E0F7DCA098", + "name": "daal4py Distributed Linear Regression", + "categories": ["Toolkit/Intel® AI Analytics Toolkit/oneDAL"], + "description": "This sample code shows how to train and predict with a distributed linear regression model with the Intel Distribution of Python using the python API package daal4py for oneDAL", + "builder": ["cli"], + "languages": [{"python":{}}], + "dependencies": ["oneDAL"], + "os":["linux"], + "targetDevice": ["CPU"], + "ciTests": { + "linux": [ + { + "env": ["source /opt/intel/oneapi/setvars.sh --force", "source activate base"], + "id": "d4p_Linear_Regression_Dist", + "steps": [ + "mpirun -n 4 python ./daal4py_Distributed_LinearRegression.py" + ] + } + ] +} +} diff --git a/Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Run.jpg b/Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Run.jpg new file mode 100755 index 0000000000000000000000000000000000000000..4f54045950e68e5f99a90c513070114d9a1fe0ed GIT binary patch literal 21870 zcmeHt2UJu`m+m2F5d_IWg5;c`$yr2#BngtCTXJYX5St(6h)#S zP0q2&IW({R=X&oybMKlrcV^yOv({L>PM=+M_Nm(Y+f}>vsjAEGmovcCTWXqW00ssI zV1@nxm$PJqs(ug$0MOP3xB&pb2QZ&P0BkgcfzDX1(F1tsI~zKG6MX^nYv@1xAJmV8 zPmKPP`$7Hd@*gF?(k}#lA@B=e_n-Ea2RWg}^Ta zej)I`2mxV1VL@p@NohfGW+72&Q88&D3E;2F06-n^1nkj2SM=o!cmuWo7~ln%fjfXT zItNDoyP|VH0+h>HA`Y;(x4X1}fU6h3wVj)dJ-@A+i-4cCyMPeCpa1}p_j9+lb+Y$n zwy}4FxXN;D)i-l6L+oTZjK#DCwcVBN!4M69Py4(6cMNR(oouD-IOOF>2tj_*elG4V z_TJXaelE_gUebQDEdQuo8cqKw7GRMBdD=Nh>#L~!#(~bsviv5Cudgq^uL!@Jr=x(7 zl$4Z!ps;|jFdv$O&+DP9x3wRis~78U6{y&I*?L0Uy&-O{%s(o$wsC`c%d$YB5IboH zYX>nKJ3BEx8*33eJ|Q7HYd&i`5g|T15fM8HF(FA|2O%4l-u(|Ox0wG6z3Y#l*WMK!>H4BW+RJf3 z8NkEA!NtMG!^OeH$HT)XBqJguARwe9y-G|*O-V~bO-W5f$H;Mmj)9$likgL=g`Ja& zhnI(zSwKX9TbP5Jhx+{a0A%<;0A>~z1`B{mhJi(f zaoG+qp%)u*&|928b~%4b7?@btIJkKD1cXFrhRUk|CI%K3CN>rh4mNtP6C)7)9Ka^S zA-^G{hXaV4F?E#xsO~@H!PW4K4dk4o)s^Q894|NhxV% z6;(BL4b5A34GfKpP41c6+Sxlef*}vQyrDk6eh>XapM-@!eHIZJpYSpVL!ZWr@;VZ8+?H@jL5#Ig48yb27Go4Q~Jv!oAUE28ieV}dk z>_BYS*b;U^C(YkzU)lyr!c{7}Z4DC=-<6dux|P4GXE8obE%3!tBf5R3WPnq)dt<87 zti$y5*s@K+zdZ@*Sv4*~UNtzLW?U*i)eBYK<3V(>W%nQhQudAbv*$#I&Q&v4Z;qu^ z!epF-w^?@kC0OZ~C372h2*-}AwflSPWh^^*LtKst${eY?qzc)_vT6xH9#HYd0&tyN zsMy6pRItgzb@~9p(Aqb#$tA$T8(L^ajOn_@2wQ??3W>40#i*%M50NcRPYtz|f}|wM zsvplx)vJ0+sG?$<`#8-Ok{M4CFKO81;K+>KS8_)l%Y;#Qv_a8vj+8{z{@s@X(bg2n zElQj=b{cq9fX_$c;5R`{0Wi%&iP74@z;@{#f93E4J&UU_ZSjqJgc zOgfKO?gJ_!!7vGSxJKbP?m+T*!dC*Z%Dm%CppCYq_a3ia z_?r~*f{?>~ovmTx3m8^*R>g)`ugu->T{D>%tG1w|Blft2o;bPf8$qity!u1qQj)cX zkz&O+$MuKoMNBO^*~68hQv0Jq!-1$+Jq`Oho`mD%;s=aO+k9WMbIb$NSNZwk?7o)< z?PZmjULDfyRq!{>7;ru1jE6aOWj$<%Y*x@5 zo)Mt1SQnuy)>5c%2&zshv`6?V?&f2?^y|9wfB~x`r73xPz5TA=hmHGA_vuCW+CQ?# z_FjBX5~)<6FUp_s$ql62ZT=L4$X#};**E2htKJFB2yc+j9{2lfDB|RGFDD^KyqC9R zTY={5UakzX*buk>eJ>@zGXTjN!jRnuso$EZYz`8y7yT8I5*eK&vWOINHOQ| zd`&oY?o|;L{rx;RIHS1yiD51aJG zyWf&a(I3#qaQ8A=Q5d?Y>s1bVpSV>LJW2&C@SkZ}U5g32HvBCG;LI=HEZ&@l@NvX= z8-qWq%m&%B-}96S!DlU}Of5dn)7ai)S*z%Y8i400JMp5-okrAmzg01#_N)T>aT_VV zFKF7{V}W|R)7-nGjMX1{2kk|QZSxUpS~B;vmWUosP3gRh{=n6CMO{+%)bK}h@oNP9 zxm7WK2tPwpjl@GsAE|PnF`gOA+!AXCK!|eUEkOQ#}MoWWOUaY{#gM zR|)8-Qc@Tj?e&TZ_R+_Wm)(n_zaI618kZpgn=yb)fqdX+6$V>SGg-Mg1X1lh`#SzI zmd>x{g;ZAJq37H>&5FG`PEZ_Vj)YPC`~KS@8VkxV*3=Z4!T>E*3SEqU=fbj0ycq?$ z&7h-1B3<(eV~6D1;&iy{rs(Hq6AueGPHQ#4(qU!Jr*r%5>s+4{?^U+Y$lP@4`l4P% z0h69j^pAT~lKMEQ`W&lD(*JI(aA9&n<0Zg%r_x?MZouUFZC7Bw*W(hn-n@{sb_pDI zT>|RtP+Ff1+}93OBP~cg-Y=eCyNa$&q_pn1J*||d(8PV^%4gO8Dku4oP`Z9`TIjLT zcEdq0R6S+taG9<4?vamhgNNiGNOhLvG1A1u4(&q{MuDifF0qvkWiBJ2fSNf5&K;&pfj(@8F_@pw>kT7^)Zw+m zs7P!qKeU0iook_T_h)HNw5~h}jZ3c)%bk_52bgC;u#&An!ps)i?g#B%_qrtwn@J zl<>^8m~Z8?ZdWECOwP^UHjVa7hRtP%R|F%0kDZ}sh4RwV_Qv`pnJ*8p?P!5L7;RwE zvPD1W){$>fCY)TEZ!?1@Te&{b(>N_JvzUq4^w?;x{rSV}_3FCDi}xgTc26)p%35K)g&Bu^tsbLg9^SxCFig_K|8Fv>)5=++G_seKhc}dN!=GqO;SD%U6&W zxoev>&pJF(m07ZSpJ)3L2!oRn6{YFZjnjob;2UGQ6LZtZ=LOkW^GiyaPUM6g?YE(?Sn(XKa?K@+MALN4F*IKwcVvK4)?P1LuXv%-AKYpjX z^T`AJJ}(n@PEgh6k@E4qnLv@*(r2acOMv*h*`yg{NQ=0Dk%syPb4;L9Km0k2Oj-1) z zXF9C8JFn2`Q)$DlJQ$;oHFc1-bt2^2JJI>FoVVRkFdUJ7_tRrrov7PKscypw-G~`POC%kaFN$L0EKE zf!hOTfhBzw9@-BLGCM)7G041M_s;79WdV0t)``>Mwx=%YtPTM|JwvTyA1z>WS|)KPTN7j<4e7@mEQg z&0WjqEN~tZjQLs2_#G5+Zp@@=309{%v3aW3j>k|jzgQ0MZj~za%zCyGkE(V%plIA$ z|0K}gva@xb5K9jG&b@pIsEXNG`mHM@%wBAyh}S#9JRo^RT8wG=>PsJM2+coi3g!0+ zz>zeP-6d<0M$>&L(vzggL5tGk@$*=!if>DSL}pRz1EyoD`3Sh{DRSWuNv9egs>iHJ zD`a^Px=6C?m5@eqWeIT!blfK%7U1W<%Ilw4-0jb4Pl|Jjb5i-#grJ7Y;vQvWMfx>AESi>^AZUvu zVxrD50-8ZA{+j-9#8-M@X3C6-71_KZ;$X6%TvO0Fz|5RhdpA1gRGVV%#IO-Y*~O-!4c1rY>GQzj)i?1HZAvXQ}a zu5)Jt@c_+yzd`=lo9dP1nbCtw0MJs0$)F@qO?y3-rb4UI&;6X*sgAazS-9L516FT} zkA@opcICU~p*L4v28qWe2CQkg&JUMBJqp(x=Hw~WZRynxb#7QWM4QTU$l$k?f5Hy& zW5sM#iY%?l9lr$Pb#{mP`x3{BwouxST~T>B+fxkueLD4UtrNM?>(!5&9g}tACL~f8 zFryYSCTF+R12674>2%eV#JOcoKvJ5YnJGHe`;G~6ztfAN{+6cUAaz#sbU6xIQW&Xk z-t%STFd;eZE0byMuJI} z#omF91{H#+syC;GTWn;ZM7sKyuK;08$U|A|iip7j_`PXL=Rp(?DZdLaH$h<6brgT-MJ)!N>4 zLIrU~4s6|-pB5^t7QH%gtDetx3<~gZp}o zz5?1WBSbe06|#f~snT_?s5}Hh`{VHsQi|R|`d3XK`55IkJo~_+*0nh*g)b<#7K2wt zd4+G<0(p7~q@k3Xrbpd&&vm%JJ+hEZN-UN|RkJtt9zAh!743H=6s_2J)7>BR_!8i} zGvxu_$Bf90)gEckhrwNoalYo}HDxk4y#N`x5GyXyxJ{O96vTN{dWDD+`(IGzWQf)_ z7av|QdD`YkJ!)1dJ!Vw)C=~JYH_{>FUh!}eW)JH{m3lZaAe-a!Aj4OaG39o)}pQPL@*%YZ|kik z7huTmeKGF_=+p6-0|-~_^mn9r;Px+r)}u;mx>@D*bRi?xPmNM>qEiw>CNY^g*n%h0 z7Y;aTbHXnH%k|S;-rv8tetvoVyx(qbp4K^=xdfuAlh4pT`w`R5TfBikO4XON8k4zp z-vkb=qQ8grMPq%FBI{$!z@0k9(r@?JQM<5dj&tzu1g1YXsj%f1$HgwJGxjGD#F6!j z^%nxa5cp3a@RndDD-z(Yy#y>c&e~20%MJTZSLLUex^*$`jhc4F6{J%0@|P@`sPrzp zYB6LBKAH>&H5Ch$OFo>Y(&C+4y>VTr@DuT%3X$8Rs7#uf7|t~Yq68-`XI=9#i;kN1 zT04C#jWeYMA?(mktst@ycu~R#uPdE}L--VgmB||8V}+yhhFSVCea1*;S$RWjyHLZ* zXK(%mh=Ug1=T2WM(Z$1;5^G* z+-|^SWsPx?{GL5M(Ug!oVP+;eg3VP44(8$FS9&1`bi(9LvJ0<7rzI9oNW{UD`Wg)p z!tQD`ckq`d48BFo2B?%*)Caq0R}g)QVAND-sYNJuQoD4q$|Y7SKnc<7c-&@dx)b-O zOvAk6zm&di0PyX*S<^FS9>@w9#TrIwl2m@tgFCT+29UFRb|FS) za1}jmgr1_Bua#ugguxr&9|9(&2~0ZNRRlY1M%J*pDy!5?$}GlB_7|?j92C8UB$%#| zs|!np>L|NC_O3(e0ivg_avy^eD{t zBXZ@~AJ!0BUDxWm*mB1VUerbe_2q%|UAsvlc_N1P9pLc6hE%#&j27bF^-kVnTsGH$ zgxFN6&do%6A7fjMhU#*-Vdz!Sn3P9VC@{%h!IQEHpQ!kn3CaDu(bG*lsP6b=tMAf9 z+R=$HbRSQ|o>y-3Rj%CN*#Cf16JhA;JK4`Qn{IqujiWOryzbq ze^>TrfMZOpZ2h;jj04jQA|e#ueox?adXiY4duS zH>y4j`NI_I9@68cpFu&15kzJ8b1+ik3)jGd5QzxdB!}RSg;DW7>0DYWl55?@_d+yz zv%{tAlXiuKvw~mEvu-s@V!iQY+wTP%b7hT9t!O>4yEb_<6C1)CedRtvw?sOQoqGt$va`%A3!e)ZfAF20`Fo^ zVPSx9BrNU{&}Q1d1PDQ~r{GH;)Be49Z02fm%1F*Lwj|Cp)|ji0 zH&3XoH8;5J*(rV8lu7j)Y9Xa^5$u!IS@-kQ4ID?{^%knU{7@nj*GXSq@>_))vcY0~ z_$cNFc@R`@KTFq_g^NPV7JzE6D5;qgUIAhAUr2CD_|lp_cg|BL7tz(-)H3pDFS(`Z zLVhMK!?0~&luS>a^Q>>{7)E!>KrKZ>Va$WJOtVU;{Ft$SN#gJ&kZCDxyDddYkD}hW zpG=diuyeb*Ug~v>GqOz#KP>l8MLe@O4FbCN0IQxm$Gk>BC{#7>kvJM=iRX7~x}y4PsyoFIA) zDfE7zoQ~t}&8&O+DiHr$E(+j3 z$wXO#ywOzU6p%B0?dq+CsF@JyfZ)4#&wuNsdcVmMM58MIvpX~XCJV;PUu^#v-~T~C zC@}c`5pZeTNS|BiBiAreC~8z}`JP9AmSD6-1UopgJQ#+p5Ghd8UILLIzS-w5Lrr$DuJ~<*>Da&U|YWFzBy*9^tYvO9YadrtC=lcy7yvr+Y_OB<3~E8c*_0 zcp{|uxMlUo-+vlZZYqjD>f%Z^JXtl@le3Q(3=l+YL8vEl z=(7)^jg`4n@?2Yb??MT^ZftT1?2Bby0t>997oG_xxtOG72w2~H`J)Y^`-z3eFtes% z-u=Ftko~boskvuTmVCSDU=uHMH6@`PaX>USd1zrNo>lvpD)7=9esHRa=UB(4QKBm;n z5VCk7eTm1!I$8@9&s~@mlatVIApHozU+n5QIQ8-HY6o%BBAaeoe)pF44xLG9I25ah zr}9NLOj6Cg!()7Uu%?j&>acSqU(4dNU^q^luoe+{-s6@|%DatfnnX!b^z#(ZD^DNu zzW?gsg3*O{WRqzoV_AKx+2JWT0-w=jJ_+rJo~t-1V>jCaw~MRXh1zWM=rNF;BHegm z9`XyU5*MDaCyjt{6Y9&5^VHfdHEV=W!Xl<72ZjO`TS5@9kPv)N`c7@$nv1Lbfc?;1 zUC@LgAL1i+Bw^^sTITcl5FCL>Br>mYkWRO%uBI{)!qC)9{DH!aH%#)`bq0;TMhD1a z#^?8&;NS9gwZo!@lpSsPQn@0l>9-^qX$r)3= z6o?=IZ^R^R4o>k%-B<=~a*>xB=oy0uE%D=&&Iw(y$h<%E9-+T>j#^7iXchG1`;EGvr8*Y2nuE+IZ z(e&k0E@t4Rb|7*;kFQ2wBxfrZ$S&?98JAu^(_0`uyW%k-gLHy=o#IjQveuly;>Lbx~ z*=R8a+csK;5!0`yC+Ty7?(8N*?swNjVq}YQ6>ww0KntT9-Z_C2<(7e4^-|v*pUjXb zqXRM6>~n}fYW?G90?i-x$7$RI=Bq;xl#^`H)E{H-b{N+*^&_Et(jTo|`Nd?H28sSFF|7mFDQG5w_Rn+f5a!AYmaY!7mql= zG5`K_4DQZ|Hp~Lnz2|Q^x~*QmHku+AK`gv>72tTOOy1@&BvXq$!_8&;F=41uLaTWI z8o?3ML#*Xi-5WmlX(kTK5i-V7WHmE0kB2u{(coMyAo0}bBRI20x^5#L9Q7=(JGC%e zA0}jem<@&5%0J5;5ik_Xx2guG_lSqlcCb&_>D8t(!-G#G=gU876{8&;)xiph=?JP4 zyp=a7I{tFpD-ZFdDPG~dyq<|~2FW9)K(2Tf!bxiM5W(z^{e)iqS)fy>-TpK zuRV}{#Ko65gu~g|9?Wn@P@|P+!aqhjmVkzpwlQ@=2V~^`ybYt~buLu<*|=6Y??m~U zCQ^5`(|5@X)^Z7m_?y%n2L#a6oQl^U?h9_p(R?4bPUs`cgtAt{plk1?ecWo;(4!emj-u_%Y`b5TyDEWgPou$Mjd8S!S z5S3L22CQnssEe6;BJPy5r>uZd4YWNt?Xfyr)W@V}y~DVd6ww?Zm$Lco=nl%r;l*+7 z_QV?!HGy-g&zC^Eu@7%CKDKFF=p}HwN2ZeEYS1a6o! zJv|8IXnQQU17hT8s2V!pDVm0>dOT!~(s^Kj|FPccY7HyR%u;r7QRKxHh6UxelW zKd+ll=Lf+EMb7scbm5ZnTr+ze5MH5H7;PJECoKo>RV#YRu01jI$hfuLpsYL6aC7js z7{M!Z^M-iA#BVVge8cv!tf3a@$h2FH>2#4}@76p1oU3Ew6Fi?lTCA_lU9A&>+DZEj zGRBf%dq}2>Jp-uLDtwx7&9|zBrg*+4PFPr&=&HtkVXSL9V}*q#RIA`nVN29lf`qf4 zMR=OoMo9wuJe0b->$Wzsh@sRjI%DIr{K;V~>LmM2xDOSWeF-!Z-;|`MXtdg&40LYcz2aH=eB0QiPVJhg3fdp;IR&_gRy9b02= zqP_zn^g5(1dMX&nd-v}h!ur?G^ikiQp2a^_J_r9!U?L4PXG7)|bhy<_6+Slxb$`zW zU64qe88K~Yo{;Wj|0LcLro_CSEw-^wH7*}?TU14qLWw;;#MIyfx8%Qv3lKo6A<#UC zKVNS9!!gn8JFF#YSEz|&c$9y>|9eH*R@Vbw-BgL8$&UMyuau4HXQi{kwb%V(s*UY^ ze!5x7k|YhgU*F6Fd87%fmSaJ_GxjGDbn^_FcYdLK3BYieT;XWj=fk#U@?n+MsD+Fd zmjJE&?-anliCp}UBDowfId5-hj2*9j+o0N+47uP*dELB%F?7+oZ%$S9qKNc}?fmKS zz)zwdJJ#2p%{kuOtWL0vNBV_&X1;Ut2k&vis-dP12xh#BuCppr&|KP2{(pLL`VWDw z?~Wn|s}a@+8wJu=Qj5)<$$tXjEkV;HI_8kqMaLYi_knrxWrSZ8y5Ws1+b?=>H^>RR3)^(=oEb0TZ3YY*E|xfs!sY$gj@TB zNm%)FOjz$9{C`k@^jx6a+K5>|#Ef6AQcM$=Mqg0fv`3opNeh91Yp#Og7=Nkzn!8ue zYkb!4BN>P9J&RN4bFvyGw~WTHo5gyv2Ls{@;@xT+Gpaq z0wd=Z_-V1fG@bDa2>G;X0OyB*8iV(DtJ+wwhH=+04)b6S+AcoH-$04}PGCrk7t1fN zF`XTAQw#7VImNr)z{tTSpN}h7-~XdO{RH=4VXCcFPd)r>xms6~jVFI$S6lthCd1>H zPaQY2aQujlHGw~?r6pahZlIA&Cp-xuLn@@Jz5y;6#rShH(eIDFg0x5QH)ZTDZ zz73~RqUP=`3C&7B_~bz)!q+YP{!>@hsARkX3U}aBEPU28}J{{_a745)Yyx zSnJBe#bUUS0~FSV77AGXgsLf0Tk~MMdzkIBk@UgwH7K>$f5NFBgZS_h#fyp0awIZS^axc_Rrs{ia#+`o5$zjPk|H@}~X9Ld-_AiX%J zr0n=|jYXY!pnJ*ShF&m8kRz`w=PyJJ=-{)J&S!h|3Xs{+{BM!=&(j!{2jQf^O_~(BXiFz!>6$;)JH0|L<>@ z#b1+c{k(h~-K5Y9^W{7+A(P~Epx@Q*nyH!GYxicKK&kU^%NCDQV=Yycf{&Ku(%`ur z3i&YcMw%XpCkGw6RVbk*Lk7^e-^BcSur?#b&Xvb*G?SE@IB%>_(2)VX4eRBj<~?Z` ze1{1g^0uJeYxkR1+u92?RFC;aCSXG}NdoA-g4kfu=dwyXW2U>ObJyx58fzgw5=TKG yhq_x_g`z`^5lQyX<2{x?1jbv&D>+j*Q^1Y#(r>n^wQ8vX|FdqYd=wa$WB&(DxXMHT literal 0 HcmV?d00001 diff --git a/Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Save_Py.jpg b/Libraries/oneDAL/daal4py_Getting_Started/Jupyter_Save_Py.jpg new file mode 100755 index 0000000000000000000000000000000000000000..f4248cddbb8d7da54d2846f080cbe05932804192 GIT binary patch literal 34787 zcmeFYbyywCwlCVaLm;>ZChqPY++7obyE_CAkl+$rg9InR-Q696I|R4j_GYry%HH?f zecw6z-t)cp$6Y0>GY21S!Dl}M zl&3W$DsyLNdtN3cTPH>XV>?3=Mk6~LCU*mSCKg6!CV*ea-QK{+%EXz}(8SEbR)Ffb zv6YI{!dQSxlS7_a-d@ba+(OFJ(L~i#LCwh1%8199N=Og|k>8!y-NxR=#Myw<-NxG1 ziPv3#>=$)jaQWwDCNe>OM`Kf7m6sBK)&QRgko_r(o0}V>8ylmYqZtzm4-XF$Gb zw-ETZD*x?Wf5Y`}A@FZi{@c6$hU?!#;NPnJf8bsJ2zpIy!I7>TIHY}A0K@<|7#LU> zXgF9HSa>)%ctmt0L<9sxEVO6H=(t$;c(_=&I0VF0F9?Vzh;VSp7|AH8Y3S(b@JX52 zm}pt4Xz6Hw5`ln&het#}#6&{Eq$R{9r2V(Ir#1i`9zq);6AFS1fJBFYLWg+j0!YBg zMi}rE=g(QrKQ{1y&wRn zKh*lI*?-Ur9jq55G&B@6+)uqAAl<+_6go7_3l>-m5oI_7hv#Ih{_vQhF`vrY5y;t8 zPOuCe#}TnX>}wRKKUMps*?fd4Jcek=A5z2*UACi+w@z!Si$ zLk-!Km=MDs#?V3j@>4GI^*oxWW#Dy`+#UObjvi9&2R9F9&KirjgRVI*Z z-O+2UF=N*CY@oOW56iE; zSKWFH&d!fF?q=eJG$t$OmH@nUpq@=+t;>3Ht`%QmO=Xze(4!U}4q67!QKRBE9f@3X z>v$7XR=0(d3*Vk9y>7f(6Wuw+^BM)dF}S?LpJM-%y8&<#l4t6JYX9Fl#tp@5oON-tT7Pro|TR z33wfu<@yb-{{A~DYW~F+>X07%$QZ8@@9b~dulMjL7HCJdoogT$S?uDzAhi(M@3WO z>XE!h49THn+}kO9V}{3bJ6`4W=(08QnzE&#F+?@cI?)u#h5}oIjJ-Z)G-U|xbNlfK z-%M$5=*2Y$%s8jEV0P`eu0R{yAWJ_1vrV%YN=Q^5$?U~7@-Z`-s4p)7_em(-V#qhUGoze88r8&sX?*Hxn^`ieipptML^NOc;#;+X z@{8r#_@cfmoRF^!*2MW&?{d1rPbhXOU26YKV7@xpKUU8&aokHT4 zY}shcYvp0cd_5$gUo&F?@@FYLZ^x>IU3Gst-mLY1Pp2Xn>J7s z@ypYYZ`~~4-M3TiHhgQAcP}sJU-fZ01{pA7X0ywa3AMOp@Tn-Qx76^K{&Ya_Ft$(f z`jnzJr1VY384di$)u0|VBhZwmUFc!8fQ~M1=@B{IwufV1oi?&DZ11G~XEvwbL*Hfz zzhkU6Nv*a*?aLTs^Cmr0RosQ^jW4CTVLi9jUpq))#KXY9n|j&O#&z zhU=FbrDyLuSSbzkBiEdDtbsxeg7pgC_&yUU;2`q0W@1z;`(AR`H3@O`!BEuy2A>nk z=(oH(q^He6!y)qqp++4Bu1~=8MpC>dz(Zbh^4U!F-NVo1b}>;}EB1JN93(D_PE!4~Fry5d;8 zEk=`2QEyc;huj7$JK-Po1iZpjf)M8#QancF?|MARLiT=rwB^Fh;5;Yus(~#imetUn z@AAJI z(^xlND&PI%Lwkmy>_&Ei0F0GK`4jNg$GOGQ*Ifx#D3~4n(=HnFvaa5e%oUC}EN0&h z2+d$3RtSCuKJxC$T&WRtiu-Hv z_@-U$4<nR!-||ELoD*fFbCo-p!=m9nf{J>EEje~l!pMH#Lu^V)(uKA){T&eO~Y zc0>e^W89g4PL#Kj|5h(t`gMASt*~qIauiiy z= zkOtYYG4~vS_|A``{2v;#oCHTNz{QcOhYgruM1u%7+zRo8($(VH=8aQ^+}w?8rIJx9 z$`QLNh`n1xx}7t?Fe(3R4H!ism_NR)NHrWL4{3LLcT;O1Y0#MFIhA=5z8gl*+KVI! zBGym%zKQiMPxA>NG8Dp}`YkVNEgOd&ibcw5E&fMJ{mOu6N93g8&wQ#W$^!G}7U?ff zY+~LHaVnlh>zlgX zh^2+su9NC@@nyl^^WRIdq9WPY^tIgvW{y<9{fjrOL4F{+)>f@^t;rcn^~(?|gRlU} zw=7hr`Qob&#CT-R{=Kn{>Tf$EuZ5KB{Jb<~e|F@bJzALX+#O9M79?$>eVIU!JhHPC zzIk<5e51aQiEN?iuvAjhM$oTe@k(*%l>}DEC)`rS-$1eWu3=1)*9C7cQ$~8ZK(aL* zE@mAX^2NjrTUaEZ@1|ID#t^>mrT6{tp3q?Ugy1`Qh##NfhfZ z!%!tAxXef$7L{O9t@f?DouK{Rz0kGqLBNAbDG%LT=$L&J)~_NbnxiEQzpoqd7I!h{ z>?u-^0-o;+%NA9@@xeD+Z&?`YMG$O{ z2*JS6vslyEz9}01?7K zyBCqnEGq@Y=!1mi#W~2l!Fa2ycN(vjW9VLXw3y2?+y*fUlVCCL}cDneLmY}**B zhr1FH3*mvLT=WV*7bfB&p3=~W`K)NfUa>R)GE??Lcp<&@1h9opXF+DmCcV}+(tI@* zO-Rfv)mLbRdM=u9td!!)lC%^?8|yBpt$dp{2D_{YGI8YLkEDrR6oEF_PGDCNhDmbD zh#%z&IUX3oc2;ENKwju)6n-rbg6Z4A!Kd3t!NM1>NY96X8A31N^RaE$L7d1u7h02x zd0uRL#I;Nq7Y~%-hq@iRyho7qGKOogt_;_PG%^RSS-e1}-#)szUY~PWnU*etfGEOr z4`1R_fi&_Lka?#)OU@fgsrFH)UVTqsy1KQd*$jKaj^*KqB~t3~&2;F%cvXGXE+f8~ z#JN<@%2m=6aL`e04s>haM-)a&RG7@GK2vHsf}%RX+t;u{aOO*5vb8)7|haYVQY;EPt9F0CZQZFWrZ1p&#g==!a1(&^}t&|Eee*@ttA!CDbb1usieDwBf zv{a~H;aO)i!dz8mnMW3CFy49{s)0>AVUELih_Z@01MPq;fi`(^XQhn3Jx?F0Z_ukO zU=>nPm`#17r6#fOY(>aiW&HHYZ(sH605efL)+h;bUrjVcQyAtTR4;i56-9#)J4EAU zf#CS2N7%OYIqzvL_lQq56Y+GvV%BnpO{SrIvKnq}EU=7<@MGhmEKZ@uOP&{(;?0QP zW?(d{blW9f@0u{Sd3Gp3{lD00PQ8yik`=>_eJ~*y#CaYDqtrQQig;5$mM0aq!#fCL zVNV=>Wd2|;Avr30NjG}4nR&Hjsrv-HDQGNi%r&t%pqWhVP;hq%FzKIx|3-C&T>eoVM}dC@m4=vSQq2ypHP5JI0| z{aysq)x}x(EArf}XFkS10fHNkcUZ4?x-@DU`X)^@&xz;C&`;04l5}S3IlXM5nm9bj zSYO{#>a}Gg7Az@{@bqDaFnXYS^zMDQq<#Wqo`6gTF!#_e9_z0-N0z1K_NfxTHWY4& zJ%bQRDu&XT)LQWOK)=(x%}hPMd~^nU^L$6^z;RkP$*%tSk=FAuraj&`jA zsbe?`!JCWA)tNL$`f~c1^uATrm_>Xb|1Uu?e>Z=f?FVN67cr(=&HEFQt8c|PT&Hk?ycy~Q$=OcH8pI%8Y*lRaNgb;2b7+9Ne=CBBkq_Ss^!WY$`yADk{Q*!VVfImcKm34nt4&)>E3bE`2YU_-AflYLS zi7(4^DA1S7n^}e$$l4o{Zo?y0g6;VQH>dQ&_|1%D2uCpq#y5E<3o9qM7&Wkd}Su$7vKRSuy}egdxR4Yp(GMumm3BM{y|If+ovCPcL(E_)ZPS-ckIAF zp+I67GTwSzsXPpm*M!Etnd?w!YPbWS{(zwThxpS3RuOzklNsl-^}6q8Nu@D^g2i9g z5j5rTbaKLvzNQ9#8A0M#gFJ8V{M|{ACZrfBR9hX23o@sAntgDvs^+~^MID7$F{6Vp zKHonidr6C7q3l!zLu!_w%@H>LM9LpQx;@Sy0 zJ&2noBv-x6=Y@U*pN&Z%TPDH5?Hu2W0S|O4vt8JUsQt{b!vP+ljERGb$vtbCjzYZ6 zmxDIrU*^e%!s=m9&(W0kB|2Cyd{MfKWgW;D`Q_;$LB@n#?X_^kkJHlk_=NnvHUJ5h5}aw0?A)9L`5x^3?#6{!~N&z|k|&>vbC3Meqk z{3sj^tYv0;tC+S>R{o6GY_xUjaYrzIa$J}&tpzGzOWpRE_+>=gAfmr(Q35S0Z(6Os z?J{AAH^TGa{`@xTk0Ahc1L5CKy}Uh?xiVmS**8naGY66Vp#(oi`~F}UI6LMBvUcY zp5*+%ijGo+^iKIU)+^S7G969V?n++cPQCX2uJz$U(dwE=vILX>r|$(UN?$DuzUAtLa-9TxF0$*;cPtdYRK_;1)0Uj9QSP2))!y# zf1;2j{x!pzOF1y$U@%k8|22`>9si!|d@hds7oCb;?vDjQ6~fl*&8dePIeCqb&VTX! zWPU_Mub=qfOma-}06oK4ovlAwBwlRJ6+OSwi|*fbdK{_M4+lpW0N+Pe`cO0r#`iqu zP-EpB9P?gH1KN|T^|h0+e0M?hbwDzOR0pJYNWP^dRZ1AKLjx=xQ9Tyid}-^>(i3o8 zbB&T#^(y67j1*x9P)Q_V`T#U`U8F`}AFC?$%nAz&*aE;ELz5+SRTV?d}Pf zCFLOy2%dR^+JKtlJB+n5kpRMD-SUMx7!ZmxXq_M(2#7;0AN5B-onR4uVOP_HTU9<~1kXCK z=AdG8bquyN|K6kX3DBFpQS*$5wbZfcPRlI@KF$z* z`#P}2J~dZ3l=Iu$cm|Q|@Nb&~#7*7c9+m_1xE`N*v`j;1TIs<&tcUxBIB`BR${&u* z@qZPPl(|fjPW%Uri0to**|yl*L|>3uDvE6I#7S$3`pIg@&+XqHCLh)cW^TxCQ(cAZ zNd9UB@P^v`Q)^(4rsWqIj}H%PH)Bu*pKpE^#(%XCM$JzB=cY~-Mt3UvLg%x$h+jF^ zb7gTUTk2wJF(zE?3^ZX=?V>h*_4trM$w3{Gxr8U+(04`6*AyH;R)fPbC0O6TZ~XrQ ziAL4E_owoQl|5R!gF)ZR*5jgBB<|A1Dz6F&9(IUip{2sn`#{x)P9?=FOQ$D*bm|GP zAA{L=eXlRV9@_7VLf&s@taS3etBFlAlXYe3<1ID{6rN}- z&#U%r5TfP$awWN?N9aS-6s3fNvAB(-%kv_I5?8ZMg>|_l^J-c)|9WI^Un6YjU5R6d z7}E(kC>%||jguK7OVaM22s{R@yG}dbH5RAZRhBj&K=8DyAheG05T(_+NHUP|z2fKO_1AU{JlJHrCxE!z zw@xTt`4nq$hL~WA@i5hTtZgKsmYdYmEyp0w&2VE*r?2FT4HDeQsu3K|er6dAnYK>P zCzW|DC|pQpmcW?XS#q28;s`;$o<_FCY-vp8(jiei=!jSmlBFJaA!SXQCiB|oG@MLC z%A*v}*x|o_s;sHi)M8kVFy^s;4;UTq%fjR}mzeUIlLeuvM?ePNXi||x zy{q{`&+8;*v}}yTx=LGABXZoSHtD{`uwx)^`<{@H_HdJUXXmwGRonOARKu_Q zNM^QV(OVnnC?($Ano*(^!P|T5xE+d)04u;nid2i}(AI4p7>$DIS89>3Gp8eN11&=y zBw8jsN(jzGSauX;2MJ?I3d70h{EEDPM&D2gjpMPGUf+k#KE-ZI$2+nFL6OKbKSbb7 zlQ*4j-;kYk*l`P9DC}#>S`AKp!22v4B**)#{tdJAkE`ND-le>fQ6wD3dX;w6wstog zhHvW`xn6m`*Ag4}M)OF8j}~u-9}W60i=o8dTUxnNrEY}`AT(`E3dIYzfZp$pEh6cL z#4NL$$Fjwn0a2ac*sRkwoLOVjmjYvm87b2EXV#7h!f;c#}A^vQ79PL|egb z-cwK;V9}!0RGK;>z;>f9{Z{IQG`6e?l{O+Y24rbaKhNcEluo5xNy(McC;yyaUHz(? zvQ9+q)u^5bu_d}Fl$Y$2mW*XQsl$IztO8Rkylm%R*?SA7$ZI4N&zN3JnHASJp6b4l zI4BYCcg_tbxa6s7=i-pROz-wAlOiT_7#RUVLYjQC#@l>KU;!&6I0s+55(oUODLv=9 z2@&x0Q8#fmPgJ#P6*Nlvi>x{^XgGYbG_F#N?1k_~`Vy+;#6IF}>$e0({+=+O)AV*X zlEgBW