diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/CMakeLists.txt b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/CMakeLists.txt new file mode 100644 index 0000000000..9dd05e922e --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/CMakeLists.txt @@ -0,0 +1,27 @@ +# required cmake version +cmake_minimum_required(VERSION 3.5) + +# CMakeLists.txt for ISO2DFD_DPCPP project +project (iso2dfd_dpcpp) + +set(CMAKE_CXX_COMPILER "dpcpp") + +# Set default build type to RelWithDebInfo if not specified +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Default CMAKE_BUILD_TYPE not set using Release with Debug Info") + set (CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel" + FORCE) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fsycl -std=c++17") + +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lOpenCL -lsycl") + +add_executable (iso2dfd src/iso2dfd.cpp) + +add_custom_target (run + COMMAND iso2dfd 1000 1000 2000 + WORKING_DIRECTORY ${CMAKE_PROJECT_DIR} +) + diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/License.txt b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/License.txt new file mode 100644 index 0000000000..6e9524bd74 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/License.txt @@ -0,0 +1,7 @@ +Copyright 2020 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md new file mode 100644 index 0000000000..604dd14b56 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/README.md @@ -0,0 +1,145 @@ +# ISO2DFD sample + +ISO2DFD: Intel® oneAPI DPC++ Language Basics Using +2D-Finite-Difference-Wave Propagation + +The ISO2DFD sample refers to Two-Dimensional Finite-Difference Wave Propagation in Isotropic Media. It is a two-dimensional stencil to simulate a wave propagating in a 2D isotropic medium and illustrates the basics of the DPC++ programming language using direct programming. + +A complete code walk-through for this sample can be found at: +https://software.intel.com/en-us/articles/code-sample-two-dimensional-finite-difference-wave-propagation-in-isotropic-media-iso2dfd + +For comprehensive instructions regarding DPC++ Programming, go to +https://software.intel.com/en-us/oneapi-programming-guide +and search based on relevant terms noted in the comments. + + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04 +| Hardware | Skylake with GEN9 or newer +| Software | Intel® oneAPI DPC++ Compiler (beta); Intel C++ Compiler (beta) +| What you will learn | How to offload the computation to GPU using Intel DPC++ compiler +| Time to complete | 10 minutes + + +## Purpose + +ISO2DFD is a finite difference stencil kernel for solving the 2D acoustic isotropic wave equation. In +this sample, we chose the problem of solving a Partial Differential Equation (PDE), using a +finite-difference method, to illustrate the essential elements of the DPC++ programming language: +queues, buffers/accessors, and kernels. Use it as an entry point to start programming in DPC++ or as a +proxy to develop or better understand complicated code for similar problems. + +Using Data Parallel C++, the sample will explicitly run on the GPU as well as CPU to calculate a +result. The output will include GPU device name. The results from the two devices are compared and, if +the sample ran correctly, report a success message. The output of the wavefield can be plotted using +the SU Seismic processing library, which has utilities to display seismic wavefields and can be +downloaded from John Stockwell’s SeisUnix GitHub* (https://github.com/JohnWStockwellJr/SeisUnix/wiki/ +Seismic-Unix-install-on-Ubuntu) + + +## Key implementation details + +SYCL implementation explained. + +* DPC++ queues (including device selectors and exception handlers). +* DPC++ buffers and accessors. +* The ability to call a function inside a kernel definition and pass accessor arguments as pointers. A +function called inside the kernel performs a computation (it updates a grid point specified by the +global ID variable) for a single time step. + + +## License + +This code sample is licensed under MIT license. + + +## Building the `iso2dfd` Program for CPU and GPU + +### Running Samples In DevCloud + +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, +FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI +Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) + +### On a Linux* System +Perform the following steps: +1. Build the program using the following `cmake` commands. + + ``` + cd iso2dfd_dpcpp && + mkdir build && + cd build && + cmake .. && + make -j + ``` + +2. Run the program on Gen9 + + ``` + make run + ``` + +3. Clean the program + + ``` + make clean + ``` + +### On a Windows* System Using Visual Studio* Version 2017 or Newer +* Build the program using VS2017 or VS2019 + Right click on the solution file and open using either VS2017 or VS2019 IDE. + Right click on the project in Solution explorer and select Rebuild. + From top menu select Debug -> Start without Debugging. + +>If you see the following error message when compiling this sample: +> +``` +Error 'dpc_common.hpp' file not found +``` +>You need to add the following directory to the list of include folders, that are required by your project, in your project's Visual Studio project property panel. The missing include folder is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +* Build the program using MSBuild + Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for VS2019" + Run - MSBuild iso2dfd.sln /t:Rebuild /p:Configuration="Release" + + +## Running the Sample +### Application Parameters + +You can execute the code with different parameters. For example the following command will run the iso2dfd executable using a 1000x1000 grid size and it will iterate over 2000 time steps. + + ``` + ./iso2dfd 1000 1000 2000 + ``` + + Usage: ./iso2dfd n1 n2 Iterations + + n1 n2 : Grid sizes for the stencil + Iterations : Number of timesteps. + + * Find graphical output for sample execution in the online tutorial at: + https://software.intel.com/en-us/articles/code-sample-two-dimensional-finite-difference-wave-propagation-in-isotropic-media-iso2dfd + +### Example of Output + + ``` + Initializing ... + Grid Sizes: 1000 1000 + Iterations: 2000 + + Computing wavefield in device .. + Running on Intel(R) Gen9 HD Graphics NEO + The Device Max Work Group Size is : 256 + The Device Max EUCount is : 24 + SYCL time: 3282 ms + + Computing wavefield in CPU .. + Initializing ... + CPU time: 8846 ms + + Final wavefields from device and CPU are equivalent: Success + Final wavefields (from device and CPU) written to disk + Finished. + [100%] Built target run + ``` diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.sln b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.sln new file mode 100644 index 0000000000..174faa6896 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.960 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iso2dfd", "iso2dfd.vcxproj", "{1AE3DD06-C3F0-4746-B126-EEB6A94CF35C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1AE3DD06-C3F0-4746-B126-EEB6A94CF35C}.Debug|x64.ActiveCfg = Debug|x64 + {1AE3DD06-C3F0-4746-B126-EEB6A94CF35C}.Debug|x64.Build.0 = Debug|x64 + {1AE3DD06-C3F0-4746-B126-EEB6A94CF35C}.Release|x64.ActiveCfg = Release|x64 + {1AE3DD06-C3F0-4746-B126-EEB6A94CF35C}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {AC4985B6-FFDE-4420-B533-7D4318863288} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj new file mode 100644 index 0000000000..7d258692a9 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj @@ -0,0 +1,151 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + 15.0 + {1ae3dd06-c3f0-4746-b126-eeb6a94cf35c} + Win32Proj + iso2dfd + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + + + + + + diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.filters b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.filters new file mode 100644 index 0000000000..1b7c40576f --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.user b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.user new file mode 100644 index 0000000000..f5d6e260eb --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/iso2dfd.vcxproj.user @@ -0,0 +1,11 @@ + + + + 1000 1000 2000 + WindowsLocalDebugger + + + 1000 1000 2000 + WindowsLocalDebugger + + \ No newline at end of file diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/sample.json b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/sample.json new file mode 100644 index 0000000000..f97a3bd596 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/sample.json @@ -0,0 +1,30 @@ +{ + "guid": "9483C0F0-7D63-4E99-86C5-37C40F77B2AE" , + "name": "iso2dfd_dpcpp", + "categories": [ "Toolkit/Intel® oneAPI HPC Toolkit" ], + "description": "ISO2DFD: Intel® oneAPI DPC++ Language Basics Using 2D Finite-Difference-Wave Propagation", + "toolchain": [ "dpcpp" ], + "targetDevice": [ "CPU", "GPU" ], + "languages": [ { "cpp": {} } ], + "os": [ "linux", "windows"], + "builder": [ "ide", "cmake" ], + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [{ + "steps": [ + "MSBuild iso2dfd.sln /t:Rebuild /p:Configuration=\"Release\"", + "cd x64/Release", + "iso2dfd.exe 1000 1000 2000" + ] + }] + + } +} diff --git a/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp new file mode 100644 index 0000000000..e4638ba703 --- /dev/null +++ b/DirectProgramming/DPC++/StructuredGrids/iso2dfd_dpcpp/src/iso2dfd.cpp @@ -0,0 +1,380 @@ +//============================================================== +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +// ISO2DFD: Intel® oneAPI DPC++ Language Basics Using 2D-Finite-Difference-Wave +// Propagation +// +// ISO2DFD is a finite difference stencil kernel for solving the 2D acoustic +// isotropic wave equation. Kernels in this sample are implemented as 2nd order +// in space, 2nd order in time scheme without boundary conditions. Using Data +// Parallel C++, the sample will explicitly run on the GPU as well as CPU to +// calculate a result. If successful, the output will include GPU device name. +// +// A complete online tutorial for this code sample can be found at : +// https://software.intel.com/en-us/articles/code-sample-two-dimensional-finite-difference-wave-propagation-in-isotropic-media-iso2dfd +// +// For comprehensive instructions regarding DPC++ Programming, go to +// https://software.intel.com/en-us/oneapi-programming-guide +// and search based on relevant terms noted in the comments. +// +// DPC++ material used in this code sample: +// +// Basic structures of DPC++: +// DPC++ Queues (including device selectors and exception handlers) +// DPC++ Buffers and accessors (communicate data between the host and the device) +// DPC++ Kernels (including parallel_for function and range<2> objects) +// + +#include +#include +#include +#include +#include +#include +#include + +#include "dpc_common.hpp" + +using namespace cl::sycl; +using namespace std; + +/* + * Parameters to define coefficients + * half_length: Radius of the stencil + * Sample source code is tested for half_length=1 resulting in + * 2nd order Stencil finite difference kernel + */ + +constexpr float DT = 0.002f; +constexpr float DXY = 20.0f; +constexpr unsigned int half_length = 1; + +/* + * Host-Code + * Utility function to display input arguments + */ +void Usage(const string &program_name) { + cout << " Incorrect parameters\n"; + cout << " Usage: "; + cout << program_name << " n1 n2 Iterations\n\n"; + cout << " n1 n2 : Grid sizes for the stencil\n"; + cout << " Iterations : No. of timesteps.\n"; +} + +/* + * Host-Code + * Function used for initialization + */ +void Initialize(float* ptr_prev, float* ptr_next, float* ptr_vel, size_t n_rows, + size_t n_cols) { + cout << "Initializing ...\n"; + + // Define source wavelet + float wavelet[12] = {0.016387336, -0.041464937, -0.067372555, 0.386110067, + 0.812723635, 0.416998396, 0.076488599, -0.059434419, + 0.023680172, 0.005611435, 0.001823209, -0.000720549}; + + // Initialize arrays + for (size_t i = 0; i < n_rows; i++) { + size_t offset = i * n_cols; + + for (int k = 0; k < n_cols; k++) { + ptr_prev[offset + k] = 0.0f; + ptr_next[offset + k] = 0.0f; + // pre-compute squared value of sample wave velocity v*v (v = 1500 m/s) + ptr_vel[offset + k] = (1500.0f * 1500.0f); + } + } + // Add a source to initial wavefield as an initial condition + for (int s = 11; s >= 0; s--) { + for (int i = n_rows / 2 - s; i < n_rows / 2 + s; i++) { + size_t offset = i * n_cols; + for (int k = n_cols / 2 - s; k < n_cols / 2 + s; k++) { + ptr_prev[offset + k] = wavelet[s]; + } + } + } +} + +/* + * Host-Code + * Utility function to print device info + */ +void PrintTargetInfo(queue& q) { + auto device = q.get_device(); + auto max_block_size = + device.get_info(); + + auto max_EU_count = + device.get_info(); + + cout<< " Running on " << device.get_info()<<"\n"; + cout<< " The Device Max Work Group Size is : "<< max_block_size<<"\n"; + cout<< " The Device Max EUCount is : " << max_EU_count<<"\n"; +} + +/* + * Host-Code + * Utility function to calculate L2-norm between resulting buffer and reference + * buffer + */ +bool WithinEpsilon(float* output, float* reference, const size_t dim_x, + const size_t dim_y, const unsigned int radius, + const float delta = 0.01f) { + ofstream err_file; + err_file.open("error_diff.txt"); + + bool error = false; + double norm2 = 0; + + for (size_t iy = 0; iy < dim_y; iy++) { + for (size_t ix = 0; ix < dim_x; ix++) { + if (ix >= radius && ix < (dim_x - radius) && iy >= radius && + iy < (dim_y - radius)) { + float difference = fabsf(*reference - *output); + norm2 += difference * difference; + if (difference > delta) { + error = true; + err_file<<" ERROR: "< it, float* next, float* prev, + float* vel, const float dtDIVdxy, int n_rows, + int n_cols) { + float value = 0.0; + + // Compute global id + // We can use the get.global.id() function of the item variable + // to compute global id. The 2D array is laid out in memory in row major + // order. + size_t gid_row = it.get(0); + size_t gid_col = it.get(1); + size_t gid = (gid_row)*n_cols + gid_col; + + // Computation to solve wave equation in 2D + // First check if gid is inside the effective grid (not in halo) + if ((gid_col >= half_length && gid_col < n_cols - half_length) && + (gid_row >= half_length && gid_row < n_rows - half_length)) { + // Stencil code to update grid point at position given by global id (gid) + // New time step for grid point is computed based on the values of the + // the immediate neighbors in both the horizontal and vertical + // directions, as well as the value of grid point at a previous time step + value = 0.0; + value += prev[gid + 1] - 2.0 * prev[gid] + prev[gid - 1]; + value += prev[gid + n_cols] - 2.0 * prev[gid] + prev[gid - n_cols]; + value *= dtDIVdxy * vel[gid]; + next[gid] = 2.0f * prev[gid] - next[gid] + value; + } +} + +int main(int argc, char* argv[]) { + // Arrays used to update the wavefield + float* prev_base; + float* next_base; + float* next_cpu; + // Array to store wave velocity + float* vel_base; + + bool error = false; + + size_t n_rows, n_cols; + unsigned int n_iterations; + + // Read parameters + try { + n_rows = stoi(argv[1]); + n_cols = stoi(argv[2]); + n_iterations = stoi(argv[3]); + } + + catch (...) { + Usage(argv[0]); + return 1; + } + + // Compute the total size of grid + size_t n_size = n_rows * n_cols; + + // Allocate arrays to hold wavefield and velocity + prev_base = new float[n_size]; + next_base = new float[n_size]; + next_cpu = new float[n_size]; + vel_base = new float[n_size]; + + // Compute constant value (delta t)^2 (delta x)^2. To be used in wavefield + // update + float dtDIVdxy = (DT * DT) / (DXY * DXY); + + // Initialize arrays and introduce initial conditions (source) + Initialize(prev_base, next_base, vel_base, n_rows, n_cols); + + cout << "Grid Sizes: " << n_rows << " " << n_cols << "\n"; + cout << "Iterations: " << n_iterations << "\n\n"; + + // Define device selector as 'default' + default_selector device_selector; + + // Create a device queue using DPC++ class queue + queue q(device_selector, dpc_common::exception_handler); + + cout << "Computing wavefield in device ..\n"; + // Display info about device + PrintTargetInfo(q); + + // Start timer + dpc_common::TimeInterval t_offload; + + { // Begin buffer scope + // Create buffers using DPC++ class buffer + buffer b_next(next_base, range(n_size)); + buffer b_prev(prev_base, range(n_size)); + buffer b_vel(vel_base, range(n_size)); + + // Iterate over time steps + for (unsigned int k = 0; k < n_iterations; k += 1) { + // Submit command group for execution + q.submit([&](auto &h) { + // Create accessors + auto next = b_next.get_access(h); + auto prev = b_prev.get_access(h); + auto vel = b_vel.get_access(h); + + // Define local and global range + auto global_range = range<2>(n_rows, n_cols); + + // Send a DPC++ kernel (lambda) for parallel execution + // The function that executes a single iteration is called + // "iso_2dfd_iteration_global" + // alternating the 'next' and 'prev' parameters which effectively + // swaps their content at every iteration. + if (k % 2 == 0) + h.parallel_for(global_range, [=](id<2> it) { + Iso2dfdIterationGlobal(it, next.get_pointer(), + prev.get_pointer(), vel.get_pointer(), + dtDIVdxy, n_rows, n_cols); + }); + else + h.parallel_for(global_range, [=](id<2> it) { + Iso2dfdIterationGlobal(it, prev.get_pointer(), + next.get_pointer(), vel.get_pointer(), + dtDIVdxy, n_rows, n_cols); + }); + }); + + } // end for + + } // buffer scope + + // Wait for commands to complete. Enforce synchronization on the command queue + q.wait_and_throw(); + + // Compute and display time used by device + auto time = t_offload.Elapsed(); + + cout << "Offload time: " << time << " ms\n\n"; + + // Output final wavefield (computed by device) to binary file + ofstream out_file; + out_file.open("wavefield_snapshot.bin", ios::out | ios::binary); + out_file.write(reinterpret_cast(next_base), n_size * sizeof(float)); + out_file.close(); + + // Compute wavefield on CPU (for validation) + + cout << "Computing wavefield in CPU ..\n"; + // Re-initialize arrays + Initialize(prev_base, next_cpu, vel_base, n_rows, n_cols); + + // Compute wavefield on CPU + // Start timer for CPU + dpc_common::TimeInterval t_cpu; + + Iso2dfdIterationCpu(next_cpu, prev_base, vel_base, dtDIVdxy, n_rows, n_cols, + n_iterations); + + // Compute and display time used by CPU + time = t_cpu.Elapsed(); + + cout << "CPU time: " << time << " ms\n\n"; + + // Compute error (difference between final wavefields computed in device and + // CPU) + error = WithinEpsilon(next_base, next_cpu, n_rows, n_cols, half_length, 0.1f); + + // If error greater than threshold (last parameter in error function), report + if (error) + cout << "Final wavefields from device and CPU are different: Error\n"; + else + cout << "Final wavefields from device and CPU are equivalent: Success\n"; + + // Output final wavefield (computed by CPU) to binary file + out_file.open("wavefield_snapshot_cpu.bin", ios::out | ios::binary); + out_file.write(reinterpret_cast(next_cpu), n_size * sizeof(float)); + out_file.close(); + + cout << "Final wavefields (from device and CPU) written to disk\n"; + cout << "Finished.\n"; + + // Cleanup + delete[] prev_base; + delete[] next_base; + delete[] vel_base; + + return error ? 1 : 0; +}