From 7482e569b67e65d3fee74f257be16b8ffe5dbf50 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 07:38:45 -0700 Subject: [PATCH 01/23] Adding Advisor sample Signed-off-by: kevin.p.oleary --- Tools/Advisor/README.md | 9 + .../Advisor/matrix_multiply_advisor/.gitkeep | 0 .../Advisor_matrix_multiply_advisor_README.md | 54 +++++ .../matrix_multiply_advisor/CMakeLists.txt | 6 + .../matrix_multiply_advisor/License.txt | 7 + .../Advisor/matrix_multiply_advisor/README.md | 54 +++++ .../matrix_multiply.sln | 25 ++ .../matrix_multiply.vcxproj | 225 ++++++++++++++++++ .../matrix_multiply.vcxproj.filters | 36 +++ .../matrix_multiply.vcxproj.user | 4 + .../matrix_multiply_advisor/sample.json | 44 ++++ .../matrix_multiply_advisor/src/matrix.cpp | 102 ++++++++ .../matrix_multiply_advisor/src/multiply.cpp | 168 +++++++++++++ .../matrix_multiply_advisor/src/multiply.hpp | 48 ++++ 14 files changed, 782 insertions(+) create mode 100644 Tools/Advisor/README.md create mode 100644 Tools/Advisor/matrix_multiply_advisor/.gitkeep create mode 100755 Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md create mode 100644 Tools/Advisor/matrix_multiply_advisor/CMakeLists.txt create mode 100755 Tools/Advisor/matrix_multiply_advisor/License.txt create mode 100644 Tools/Advisor/matrix_multiply_advisor/README.md create mode 100644 Tools/Advisor/matrix_multiply_advisor/matrix_multiply.sln create mode 100644 Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj create mode 100644 Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.filters create mode 100644 Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.user create mode 100644 Tools/Advisor/matrix_multiply_advisor/sample.json create mode 100644 Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp create mode 100644 Tools/Advisor/matrix_multiply_advisor/src/multiply.cpp create mode 100644 Tools/Advisor/matrix_multiply_advisor/src/multiply.hpp diff --git a/Tools/Advisor/README.md b/Tools/Advisor/README.md new file mode 100644 index 0000000000..7ad54ab865 --- /dev/null +++ b/Tools/Advisor/README.md @@ -0,0 +1,9 @@ +# Code Samples of Intel(R) Advisor + +| Code sample name | Supported Intel(r) Architecture(s) | Description +|:--- |:--- |:--- +| matrix_multiply | GPU, CPU | Simple matrix multiplication program + + +## License +The code samples are licensed under MIT license \ No newline at end of file diff --git a/Tools/Advisor/matrix_multiply_advisor/.gitkeep b/Tools/Advisor/matrix_multiply_advisor/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md new file mode 100755 index 0000000000..4bcdbcb0bb --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md @@ -0,0 +1,54 @@ +# matrix multiply sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04; Windows 10 +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel(R) oneAPI DPC++ Compiler (beta); Intel(R) Advisor +| What you will learn | How to profile an application using Intel(R) Advisor +| Time to complete | 15 minutes + + + +## License +This code sample is licensed under MIT license + +## How to Build + +This sample contains 3 version of matrix multiplication using DPC++: + + multiply1 – basic implementation of matrix multiply using DPC++ + multiply1_1 – basic implementation that replaces the buffer store with a local accessor “acc” to reduce memory traffic + multiply1_2 – basic implementation plus the local accessor and matrix tiling + +Edit the line in multiply.h to select the version of the multiply function: +#define MULTIPLY multiply1 + + +### on Linux + To build DPC++ version: + cd + cmake . + make + + Clean the program + make clean + +### on Windows - Visual Studio 2017 or newer + * Open Visual Studio 2017 + * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" + * Select Menu "Project > Build" to build the selected configuration + * Select Menu "Debug > Start Without Debugging" to run the program + +### on Windows - command line - Build the program using MSBuild + DPCPP Configurations: + Release - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Release" + Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" + + +## Running an Intel Advisor analysis +------------------------------------------ + +See the Advisor Cookbook here: https://software.intel.com/en-us/advisor-cookbook + diff --git a/Tools/Advisor/matrix_multiply_advisor/CMakeLists.txt b/Tools/Advisor/matrix_multiply_advisor/CMakeLists.txt new file mode 100644 index 0000000000..77f6fd607d --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/CMakeLists.txt @@ -0,0 +1,6 @@ +set(CMAKE_CXX_COMPILER dpcpp) +cmake_minimum_required(VERSION 3.0) +project(matrix_multiply) +set(CMAKE_CXX_FLAGS "-g -O3 -fsycl -Wno-write-strings -w -D_Linux") +add_executable(matrix.dpcpp src/matrix.cpp src/multiply.cpp) +add_custom_target(run ./matrix.dpcpp) diff --git a/Tools/Advisor/matrix_multiply_advisor/License.txt b/Tools/Advisor/matrix_multiply_advisor/License.txt new file mode 100755 index 0000000000..da5f7c1888 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/License.txt @@ -0,0 +1,7 @@ +Copyright 2019 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Tools/Advisor/matrix_multiply_advisor/README.md b/Tools/Advisor/matrix_multiply_advisor/README.md new file mode 100644 index 0000000000..4bcdbcb0bb --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/README.md @@ -0,0 +1,54 @@ +# matrix multiply sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04; Windows 10 +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel(R) oneAPI DPC++ Compiler (beta); Intel(R) Advisor +| What you will learn | How to profile an application using Intel(R) Advisor +| Time to complete | 15 minutes + + + +## License +This code sample is licensed under MIT license + +## How to Build + +This sample contains 3 version of matrix multiplication using DPC++: + + multiply1 – basic implementation of matrix multiply using DPC++ + multiply1_1 – basic implementation that replaces the buffer store with a local accessor “acc” to reduce memory traffic + multiply1_2 – basic implementation plus the local accessor and matrix tiling + +Edit the line in multiply.h to select the version of the multiply function: +#define MULTIPLY multiply1 + + +### on Linux + To build DPC++ version: + cd + cmake . + make + + Clean the program + make clean + +### on Windows - Visual Studio 2017 or newer + * Open Visual Studio 2017 + * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" + * Select Menu "Project > Build" to build the selected configuration + * Select Menu "Debug > Start Without Debugging" to run the program + +### on Windows - command line - Build the program using MSBuild + DPCPP Configurations: + Release - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Release" + Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" + + +## Running an Intel Advisor analysis +------------------------------------------ + +See the Advisor Cookbook here: https://software.intel.com/en-us/advisor-cookbook + diff --git a/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.sln b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.sln new file mode 100644 index 0000000000..51a22a5e49 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29209.62 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_multiply", "matrix_multiply.vcxproj", "{D209315E-99B1-47B6-9E4B-C922C022BE8C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Debug|x64.ActiveCfg = Debug|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Debug|x64.Build.0 = Debug|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Release|x64.ActiveCfg = Release|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {6553E447-FADC-4836-9E68-26B86B68512B} + EndGlobalSection +EndGlobal diff --git a/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj new file mode 100644 index 0000000000..225398a593 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj @@ -0,0 +1,225 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {d209315e-99b1-47b6-9e4b-c922c022be8c} + Win32Proj + matrix_multiply + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + oneAPI Data Parallel C++ Compiler + Unicode + + + Application + true + oneAPI Data Parallel C++ Compiler + Unicode + + + Application + false + oneAPI Data Parallel C++ Compiler + true + Unicode + + + Application + false + oneAPI Data Parallel C++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + false + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + false + WIN32;DPCPP;%(PreprocessorDefinitions) + DisableAllWarnings + /std:c++17 %(AdditionalOptions) + + + Console + true + false + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + false + _UNICODE;UNICODE;%(PreprocessorDefinitions) + DisableAllWarnings + Default + %(AdditionalOptions) + + + Console + true + true + true + false + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.filters b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.filters new file mode 100644 index 0000000000..12c6e07af1 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.filters @@ -0,0 +1,36 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + + + + + + + + + Header Files + + + \ No newline at end of file diff --git a/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.user b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.user new file mode 100644 index 0000000000..be25078707 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/matrix_multiply.vcxproj.user @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Tools/Advisor/matrix_multiply_advisor/sample.json b/Tools/Advisor/matrix_multiply_advisor/sample.json new file mode 100644 index 0000000000..7777543652 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/sample.json @@ -0,0 +1,44 @@ +{ +i"guid":"6F00053A-11DC-40D1-90C7-1CFF822B874B", + "name": "Matrix Multiplication - Intel Advisor", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Advisor"], + "description": "Simple program that shows how to improve the DPC++ Matrix Multiplication program using VTune Profiler and Advisor. ", + "toolchain": ["dpcpp"], + "dependencies": ["advisor"], + "languages": [{"cpp": { }}], + "os": ["linux", "windows"], + "targetDevice": ["CPU", "GPU"], + "builder": ["ide", "cmake"] +} +{ + "path": "Advisor/matrix_multiply_advisor", + "configurations": [ + { + "build": [ + "cmake .", + "make" + ], + "run": [ + "./matrix.dpcpp" + ], + "clean": [ + "make clean" + ] + } + ] + }, +{ + "path": "Advisor/matrix_multiply_advisor", + "configurations": [ + { + "build": [ + "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"" + ], + "run": [ + "cd x64\\Release", + "matrix_multiply.exe" + ] + } + ] + } + diff --git a/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp new file mode 100644 index 0000000000..52e27f3422 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp @@ -0,0 +1,102 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include +#include +#include + +#include "multiply.hpp" + +typedef unsigned long long UINT64; +#define xstr(s) x_str(s) +#define x_str(s) #s + +using namespace std; + +// routine to initialize an array with data +void InitArr(TYPE row, TYPE col, TYPE off, TYPE a[][NUM]) { + int i, j; + + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + a[i][j] = row * i + col * j + off; + } + } +} + +// routine to print out contents of small arrays +void PrintArr(char *name, TYPE Array[][NUM]) { + int i, j; + + cout << "\n"< elapsed_seconds = end-start; + cout << "Elapsed Time: " << elapsed_seconds.count() << "s\n"; + + // free memory + free(buf1); + free(buf2); + free(buf3); + free(buf4); + +} diff --git a/Tools/Advisor/matrix_multiply_advisor/src/multiply.cpp b/Tools/Advisor/matrix_multiply_advisor/src/multiply.cpp new file mode 100644 index 0000000000..b00f6eaa09 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/src/multiply.cpp @@ -0,0 +1,168 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +// matrix multiply routines +#include "multiply.hpp" + +#include +#include + +using namespace cl::sycl; +using namespace std; + +constexpr cl::sycl::access::mode sycl_read = cl::sycl::access::mode::read; +constexpr cl::sycl::access::mode sycl_write = cl::sycl::access::mode::write; +constexpr cl::sycl::access::mode sycl_read_write = cl::sycl::access::mode::read_write; + +template +class Matrix1; + +template +class Matrix1_1; + +template +class Matrix1_2; + +// Basic matrix multiply +void multiply1(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM], + TYPE c[][NUM], TYPE t[][NUM]) { + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for >(matrix_range,[=](cl::sycl::id<2> ind) { + int k; + for (k = 0; k < NUM; k++) { + // Perform computation ind[0] is row, ind[1] is col + accessorC[ind[0]][ind[1]] += accessorA[ind[0]][k] * accessorB[k][ind[1]]; + } + }); + }).wait_and_throw(); +} + +// Replaces accessorC reference with a local variable +void multiply1_1(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM],TYPE c[][NUM], TYPE t[][NUM]) { + + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for>(matrix_range,[=](cl::sycl::id<2> ind) { + int k; + TYPE acc = 0.0; + for (k = 0; k < NUM; k++) { + // Perform computation ind[0] is row, ind[1] is col + acc += accessorA[ind[0]][k] * accessorB[k][ind[1]]; + } + accessorC[ind[0]][ind[1]] = acc; + }); + }).wait_and_throw(); +} + +// Replaces accessorC reference with a local variable and adds matrix tiling +void multiply1_2(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM], + TYPE c[][NUM], TYPE t[][NUM]) { + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + range<2> tile_range{MATRIXTILESIZE, MATRIXTILESIZE}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Create matrix tiles + accessor aTile(cl::sycl::range<2>(MATRIXTILESIZE, MATRIXTILESIZE), h); + accessor bTile(cl::sycl::range<2>(MATRIXTILESIZE, MATRIXTILESIZE), h); + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for>(cl::sycl::nd_range<2>(matrix_range,tile_range),[=](cl::sycl::nd_item<2> it) { + int k; + const int numTiles = NUM / MATRIXTILESIZE; + const int row = it.get_local_id(0); + const int col = it.get_local_id(1); + const int globalRow = MATRIXTILESIZE * it.get_group(0) + row; + const int globalCol = MATRIXTILESIZE * it.get_group(1) + col; + TYPE acc = 0.0; + for (int t = 0; t < numTiles; t++) { + const int tiledRow = MATRIXTILESIZE * t + row; + const int tiledCol = MATRIXTILESIZE * t + col; + aTile[row][col] = accessorA[globalRow][tiledCol]; + bTile[row][col] = accessorB[tiledRow][globalCol]; + it.barrier(cl::sycl::access::fence_space::local_space); + for (k = 0; k < MATRIXTILESIZE; k++) { + // Perform computation ind[0] is row, ind[1] is col + acc += aTile[row][k] * bTile[k][col]; + } + it.barrier(cl::sycl::access::fence_space::local_space); + } + accessorC[globalRow][globalCol] = acc; + }); + }).wait_and_throw(); +} + + +void ParallelMultiply(int msize, TYPE a[][NUM], TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]) { + int NTHREADS = MAXTHREADS; + int MSIZE = NUM; + + MULTIPLY(MSIZE, NTHREADS, 0, a, b, c, t); +} diff --git a/Tools/Advisor/matrix_multiply_advisor/src/multiply.hpp b/Tools/Advisor/matrix_multiply_advisor/src/multiply.hpp new file mode 100644 index 0000000000..0a77257522 --- /dev/null +++ b/Tools/Advisor/matrix_multiply_advisor/src/multiply.hpp @@ -0,0 +1,48 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +constexpr int MAXTHREADS=16; +constexpr int NUM=1024; +constexpr int MATRIXTILESIZE=16; +constexpr int WPT=8; + +#include +// exception handler +/* +The exception_list parameter is an iterable list of std::exception_ptr objects. +But those pointers are not always directly readable. +So, we rethrow the pointer, catch it, and then we have the exception itself. +Note: depending upon the operation there may be several exceptions. +*/ +auto exception_handler = [](cl::sycl::exception_list exceptionList) { + for (std::exception_ptr const& e : exceptionList) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception const& e) { + std::terminate(); // exit the process immediately. + } + } +}; + +typedef float TYPE; +typedef TYPE Array[NUM]; + +// Select which multiply kernel to use via the following macro so that the +// kernel being used can be reported when the test is run. +#define MULTIPLY multiply1 + +extern void multiply1(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); +extern void multiply1_1(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); +extern void multiply1_2(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); + +extern void ParallelMultiply(int msize, TYPE a[][NUM], TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); + + + + From 47bc84cfa214bb64ba6a9abbe6a5bf5a63e3aa31 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 08:41:57 -0700 Subject: [PATCH 02/23] Adding VTune sample Signed-off-by: kevin.p.oleary --- Tools/VTuneProfiler/README.md | 9 + .../matrix_multiply_vtune/.gitkeep | 0 .../matrix_multiply_vtune/CMakeLists.txt | 6 + .../matrix_multiply_vtune/License.txt | 7 + .../matrix_multiply_vtune/README.md | 55 +++++ .../matrix_multiply_vtune/matrix_multiply.sln | 25 ++ .../matrix_multiply.vcxproj | 225 ++++++++++++++++++ .../matrix_multiply.vcxproj.filters | 36 +++ .../matrix_multiply.vcxproj.user | 4 + .../matrix_multiply_vtune/sample.json | 45 ++++ .../matrix_multiply_vtune/src/matrix.cpp | 102 ++++++++ .../matrix_multiply_vtune/src/multiply.cpp | 168 +++++++++++++ .../matrix_multiply_vtune/src/multiply.hpp | 48 ++++ 13 files changed, 730 insertions(+) create mode 100644 Tools/VTuneProfiler/README.md create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/.gitkeep create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/CMakeLists.txt create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/License.txt create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/README.md create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.sln create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.filters create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.user create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/sample.json create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.cpp create mode 100644 Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.hpp diff --git a/Tools/VTuneProfiler/README.md b/Tools/VTuneProfiler/README.md new file mode 100644 index 0000000000..ea6e5a2f9a --- /dev/null +++ b/Tools/VTuneProfiler/README.md @@ -0,0 +1,9 @@ +# Code Samples of Intel(R) VTune(TM) Profiler + +| Code sample name | Supported Intel(r) Architecture(s) | Description +|:--- |:--- |:--- +| matrix_multiply | GPU, CPU | Simple matrix multiplication program + + +## License +The code samples are licensed under MIT license \ No newline at end of file diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/.gitkeep b/Tools/VTuneProfiler/matrix_multiply_vtune/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/CMakeLists.txt b/Tools/VTuneProfiler/matrix_multiply_vtune/CMakeLists.txt new file mode 100644 index 0000000000..77f6fd607d --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/CMakeLists.txt @@ -0,0 +1,6 @@ +set(CMAKE_CXX_COMPILER dpcpp) +cmake_minimum_required(VERSION 3.0) +project(matrix_multiply) +set(CMAKE_CXX_FLAGS "-g -O3 -fsycl -Wno-write-strings -w -D_Linux") +add_executable(matrix.dpcpp src/matrix.cpp src/multiply.cpp) +add_custom_target(run ./matrix.dpcpp) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt b/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt new file mode 100644 index 0000000000..da5f7c1888 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt @@ -0,0 +1,7 @@ +Copyright 2019 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md new file mode 100644 index 0000000000..db52e3627f --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -0,0 +1,55 @@ +# matrix multiply sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04; Windows 10 +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel(R) oneAPI DPC++ Compiler beta; Intel(R) VTune(TM) Profiler +| What you will learn | How to profile an application using Intel(R) VTune(TM) Profiler +| Time to complete | 15 minutes + + + +## License +This code sample is licensed under MIT license + +## How to Build + +This sample contains 3 version of matrix multiplication using DPC++: + + multiply1 – basic implementation of matrix multiply using DPC++ + multiply1_1 – basic implementation that replaces the buffer store with a local accessor “acc” to reduce memory traffic + multiply1_2 – basic implementation plus the local accessor and matrix tiling + +Edit the line in multiply.h to select the version of the multiply function: +#define MULTIPLY multiply1 + + +### on Linux + To build DPC++ version: + cd + cmake . + make + + Clean the program + make clean + +### on Windows - Visual Studio 2017 or newer + * Open Visual Studio 2017 + * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" + * Select Menu "Project > Build" to build the selected configuration + * Select Menu "Debug > Start Without Debugging" to run the program + +### on Windows - command line - Build the program using MSBuild + DPCPP Configurations: + Release - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Release" + Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" + + +## Running an Intel VTune Profiler analysis +------------------------------------------ + +vtune -collect gpu-hotspots -- ./matrix.dpcpp + + diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.sln b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.sln new file mode 100644 index 0000000000..51a22a5e49 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29209.62 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_multiply", "matrix_multiply.vcxproj", "{D209315E-99B1-47B6-9E4B-C922C022BE8C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Debug|x64.ActiveCfg = Debug|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Debug|x64.Build.0 = Debug|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Release|x64.ActiveCfg = Release|x64 + {D209315E-99B1-47B6-9E4B-C922C022BE8C}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {6553E447-FADC-4836-9E68-26B86B68512B} + EndGlobalSection +EndGlobal diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj new file mode 100644 index 0000000000..225398a593 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj @@ -0,0 +1,225 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {d209315e-99b1-47b6-9e4b-c922c022be8c} + Win32Proj + matrix_multiply + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + oneAPI Data Parallel C++ Compiler + Unicode + + + Application + true + oneAPI Data Parallel C++ Compiler + Unicode + + + Application + false + oneAPI Data Parallel C++ Compiler + true + Unicode + + + Application + false + oneAPI Data Parallel C++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + true + + + false + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + + + Console + true + + + + + Use + Level3 + Disabled + true + true + pch.h + false + WIN32;DPCPP;%(PreprocessorDefinitions) + DisableAllWarnings + /std:c++17 %(AdditionalOptions) + + + Console + true + false + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + + + Console + true + true + true + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + false + _UNICODE;UNICODE;%(PreprocessorDefinitions) + DisableAllWarnings + Default + %(AdditionalOptions) + + + Console + true + true + true + false + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.filters b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.filters new file mode 100644 index 0000000000..12c6e07af1 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.filters @@ -0,0 +1,36 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + + + + + + + + + Header Files + + + \ No newline at end of file diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.user b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.user new file mode 100644 index 0000000000..be25078707 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/matrix_multiply.vcxproj.user @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json new file mode 100644 index 0000000000..866dcf9f85 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json @@ -0,0 +1,45 @@ +{ +"guid":"D5D48B97-C29C-4386-A6D2-DB84D006D6A9" + "name": "Matrix Multiplication - Intel VTune Profiler", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/VTune Profiler"], + "description": "Simple program that shows how to improve the DPC++ Matrix Multiplication program using VTune Profiler and Advisor.", + "toolchain": ["dpcpp"], + "dependencies": ["vtune"], + "languages": [{"cpp": { }}], + "os": ["linux", "windows"], + "targetDevice": ["CPU", "GPU"], + "builder": ["ide", "cmake"] +}, +{ + "path": "VTuneProfiler/matrix_multiply_vtune", + "configurations": [ + { + "build": [ + "cmake .", + "make" + ], + "run": [ + "./matrix.dpcpp" + ], + "clean": [ + "make clean" + ] + } + ] + }, +{ + "path": "VTuneProfiler/matrix_multiply_vtune", + "configurations": [ + { + "build": [ + "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"" + ], + "run": [ + "cd x64\\Release", + "matrix_multiply.exe" + ] + } + ] + } + + diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp new file mode 100644 index 0000000000..52e27f3422 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp @@ -0,0 +1,102 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include +#include +#include + +#include "multiply.hpp" + +typedef unsigned long long UINT64; +#define xstr(s) x_str(s) +#define x_str(s) #s + +using namespace std; + +// routine to initialize an array with data +void InitArr(TYPE row, TYPE col, TYPE off, TYPE a[][NUM]) { + int i, j; + + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + a[i][j] = row * i + col * j + off; + } + } +} + +// routine to print out contents of small arrays +void PrintArr(char *name, TYPE Array[][NUM]) { + int i, j; + + cout << "\n"< elapsed_seconds = end-start; + cout << "Elapsed Time: " << elapsed_seconds.count() << "s\n"; + + // free memory + free(buf1); + free(buf2); + free(buf3); + free(buf4); + +} diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.cpp b/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.cpp new file mode 100644 index 0000000000..b00f6eaa09 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.cpp @@ -0,0 +1,168 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +// matrix multiply routines +#include "multiply.hpp" + +#include +#include + +using namespace cl::sycl; +using namespace std; + +constexpr cl::sycl::access::mode sycl_read = cl::sycl::access::mode::read; +constexpr cl::sycl::access::mode sycl_write = cl::sycl::access::mode::write; +constexpr cl::sycl::access::mode sycl_read_write = cl::sycl::access::mode::read_write; + +template +class Matrix1; + +template +class Matrix1_1; + +template +class Matrix1_2; + +// Basic matrix multiply +void multiply1(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM], + TYPE c[][NUM], TYPE t[][NUM]) { + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for >(matrix_range,[=](cl::sycl::id<2> ind) { + int k; + for (k = 0; k < NUM; k++) { + // Perform computation ind[0] is row, ind[1] is col + accessorC[ind[0]][ind[1]] += accessorA[ind[0]][k] * accessorB[k][ind[1]]; + } + }); + }).wait_and_throw(); +} + +// Replaces accessorC reference with a local variable +void multiply1_1(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM],TYPE c[][NUM], TYPE t[][NUM]) { + + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for>(matrix_range,[=](cl::sycl::id<2> ind) { + int k; + TYPE acc = 0.0; + for (k = 0; k < NUM; k++) { + // Perform computation ind[0] is row, ind[1] is col + acc += accessorA[ind[0]][k] * accessorB[k][ind[1]]; + } + accessorC[ind[0]][ind[1]] = acc; + }); + }).wait_and_throw(); +} + +// Replaces accessorC reference with a local variable and adds matrix tiling +void multiply1_2(int msize, int tidx, int numt, TYPE a[][NUM], TYPE b[][NUM], + TYPE c[][NUM], TYPE t[][NUM]) { + int i, j, k; + + // Declare a deviceQueue + default_selector device; + queue q(device, exception_handler); + cout << "Running on " << q.get_device().get_info() << "\n"; + + // Declare a 2 dimensional range + range<2> matrix_range{NUM, NUM}; + range<2> tile_range{MATRIXTILESIZE, MATRIXTILESIZE}; + + // Declare 3 buffers and Initialize them + buffer bufferA((TYPE*)a, matrix_range); + buffer bufferB((TYPE*)b, matrix_range); + buffer bufferC((TYPE*)c, matrix_range); + + // Submit our job to the queue + q.submit([&](cl::sycl::handler& h) { + // Declare 3 accessors to our buffers. The first 2 read and the last + // read_write + auto accessorA = bufferA.get_access(h); + auto accessorB = bufferB.get_access(h); + auto accessorC = bufferC.get_access(h); + + // Create matrix tiles + accessor aTile(cl::sycl::range<2>(MATRIXTILESIZE, MATRIXTILESIZE), h); + accessor bTile(cl::sycl::range<2>(MATRIXTILESIZE, MATRIXTILESIZE), h); + // Execute matrix multiply in parallel over our matrix_range + // ind is an index into this range + h.parallel_for>(cl::sycl::nd_range<2>(matrix_range,tile_range),[=](cl::sycl::nd_item<2> it) { + int k; + const int numTiles = NUM / MATRIXTILESIZE; + const int row = it.get_local_id(0); + const int col = it.get_local_id(1); + const int globalRow = MATRIXTILESIZE * it.get_group(0) + row; + const int globalCol = MATRIXTILESIZE * it.get_group(1) + col; + TYPE acc = 0.0; + for (int t = 0; t < numTiles; t++) { + const int tiledRow = MATRIXTILESIZE * t + row; + const int tiledCol = MATRIXTILESIZE * t + col; + aTile[row][col] = accessorA[globalRow][tiledCol]; + bTile[row][col] = accessorB[tiledRow][globalCol]; + it.barrier(cl::sycl::access::fence_space::local_space); + for (k = 0; k < MATRIXTILESIZE; k++) { + // Perform computation ind[0] is row, ind[1] is col + acc += aTile[row][k] * bTile[k][col]; + } + it.barrier(cl::sycl::access::fence_space::local_space); + } + accessorC[globalRow][globalCol] = acc; + }); + }).wait_and_throw(); +} + + +void ParallelMultiply(int msize, TYPE a[][NUM], TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]) { + int NTHREADS = MAXTHREADS; + int MSIZE = NUM; + + MULTIPLY(MSIZE, NTHREADS, 0, a, b, c, t); +} diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.hpp b/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.hpp new file mode 100644 index 0000000000..0a77257522 --- /dev/null +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/src/multiply.hpp @@ -0,0 +1,48 @@ +//============================================================== +// Copyright 2019 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +constexpr int MAXTHREADS=16; +constexpr int NUM=1024; +constexpr int MATRIXTILESIZE=16; +constexpr int WPT=8; + +#include +// exception handler +/* +The exception_list parameter is an iterable list of std::exception_ptr objects. +But those pointers are not always directly readable. +So, we rethrow the pointer, catch it, and then we have the exception itself. +Note: depending upon the operation there may be several exceptions. +*/ +auto exception_handler = [](cl::sycl::exception_list exceptionList) { + for (std::exception_ptr const& e : exceptionList) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception const& e) { + std::terminate(); // exit the process immediately. + } + } +}; + +typedef float TYPE; +typedef TYPE Array[NUM]; + +// Select which multiply kernel to use via the following macro so that the +// kernel being used can be reported when the test is run. +#define MULTIPLY multiply1 + +extern void multiply1(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); +extern void multiply1_1(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); +extern void multiply1_2(int msize, int tidx, int numt, TYPE a[][NUM], + TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); + +extern void ParallelMultiply(int msize, TYPE a[][NUM], TYPE b[][NUM], TYPE c[][NUM], TYPE t[][NUM]); + + + + From 36926f54b056b83137df43ae47b072212b42976b Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 11:44:46 -0700 Subject: [PATCH 03/23] New styl --- .../Advisor_matrix_multiply_advisor_README.md | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md index 4bcdbcb0bb..71b784fc97 100755 --- a/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md +++ b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md @@ -1,5 +1,5 @@ -# matrix multiply sample -A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. +# Matrix Multiply Sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using DPC++ language for CPU and GPU. | Optimized for | Description |:--- |:--- @@ -26,7 +26,7 @@ Edit the line in multiply.h to select the version of the multiply function: #define MULTIPLY multiply1 -### on Linux +### On a Linux* System To build DPC++ version: cd cmake . @@ -35,7 +35,7 @@ Edit the line in multiply.h to select the version of the multiply function: Clean the program make clean -### on Windows - Visual Studio 2017 or newer +### On a Windows* System Using Visual Studio 2017 or newer * Open Visual Studio 2017 * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" * Select Menu "Project > Build" to build the selected configuration @@ -47,6 +47,23 @@ Edit the line in multiply.h to select the version of the multiply function: Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" +## Running the Sample + +# Example of Output + +./matrix.dpcpp +Address of buf1 = 0x7f5e687eb010 +Offset of buf1 = 0x7f5e687eb180 +Address of buf2 = 0x7f5e67fea010 +Offset of buf2 = 0x7f5e67fea1c0 +Address of buf3 = 0x7f5e677e9010 +Offset of buf3 = 0x7f5e677e9100 +Address of buf4 = 0x7f5e66fe8010 +Offset of buf4 = 0x7f5e66fe8140 +Using multiply kernel: multiply1 +Running on Intel(R) Gen9 +Elapsed Time: 0.539631s + ## Running an Intel Advisor analysis ------------------------------------------ From 096352db363bd1fd6f47dfc3b0b6d1c22a71d76f Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 11:57:06 -0700 Subject: [PATCH 04/23] New style --- .../Advisor_matrix_multiply_advisor_README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md index 71b784fc97..8b08d385fb 100755 --- a/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md +++ b/Tools/Advisor/matrix_multiply_advisor/Advisor_matrix_multiply_advisor_README.md @@ -9,7 +9,13 @@ A sample containing multiple implementations of matrix multiplication. This samp | What you will learn | How to profile an application using Intel(R) Advisor | Time to complete | 15 minutes - +## Purpose + +The Matrix Multiplication sample performs basic matrix multiplication. Three version are provided that use different features of DPC++. + +## Key Implementation details + +The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. ## License This code sample is licensed under MIT license @@ -49,7 +55,7 @@ Edit the line in multiply.h to select the version of the multiply function: ## Running the Sample -# Example of Output +### Example of Output ./matrix.dpcpp Address of buf1 = 0x7f5e687eb010 From 2cb99e95c3c8cf1a31b60389307b2bad8a21a119 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:01:10 -0700 Subject: [PATCH 05/23] New style --- .../Advisor/matrix_multiply_advisor/README.md | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/README.md b/Tools/Advisor/matrix_multiply_advisor/README.md index 4bcdbcb0bb..8b08d385fb 100644 --- a/Tools/Advisor/matrix_multiply_advisor/README.md +++ b/Tools/Advisor/matrix_multiply_advisor/README.md @@ -1,5 +1,5 @@ -# matrix multiply sample -A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. +# Matrix Multiply Sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using DPC++ language for CPU and GPU. | Optimized for | Description |:--- |:--- @@ -9,7 +9,13 @@ A sample containing multiple implementations of matrix multiplication. This samp | What you will learn | How to profile an application using Intel(R) Advisor | Time to complete | 15 minutes - +## Purpose + +The Matrix Multiplication sample performs basic matrix multiplication. Three version are provided that use different features of DPC++. + +## Key Implementation details + +The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. ## License This code sample is licensed under MIT license @@ -26,7 +32,7 @@ Edit the line in multiply.h to select the version of the multiply function: #define MULTIPLY multiply1 -### on Linux +### On a Linux* System To build DPC++ version: cd cmake . @@ -35,7 +41,7 @@ Edit the line in multiply.h to select the version of the multiply function: Clean the program make clean -### on Windows - Visual Studio 2017 or newer +### On a Windows* System Using Visual Studio 2017 or newer * Open Visual Studio 2017 * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" * Select Menu "Project > Build" to build the selected configuration @@ -47,6 +53,23 @@ Edit the line in multiply.h to select the version of the multiply function: Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" +## Running the Sample + +### Example of Output + +./matrix.dpcpp +Address of buf1 = 0x7f5e687eb010 +Offset of buf1 = 0x7f5e687eb180 +Address of buf2 = 0x7f5e67fea010 +Offset of buf2 = 0x7f5e67fea1c0 +Address of buf3 = 0x7f5e677e9010 +Offset of buf3 = 0x7f5e677e9100 +Address of buf4 = 0x7f5e66fe8010 +Offset of buf4 = 0x7f5e66fe8140 +Using multiply kernel: multiply1 +Running on Intel(R) Gen9 +Elapsed Time: 0.539631s + ## Running an Intel Advisor analysis ------------------------------------------ From 8a837c71accaf93ea9e0840f780f1058541b8852 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:15:57 -0700 Subject: [PATCH 06/23] Fix --- .../matrix_multiply_advisor/sample.json | 53 +++++++------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/sample.json b/Tools/Advisor/matrix_multiply_advisor/sample.json index 7777543652..c226bac3a7 100644 --- a/Tools/Advisor/matrix_multiply_advisor/sample.json +++ b/Tools/Advisor/matrix_multiply_advisor/sample.json @@ -1,5 +1,5 @@ { -i"guid":"6F00053A-11DC-40D1-90C7-1CFF822B874B", + "guid":"6F00053A-11DC-40D1-90C7-1CFF822B874B", "name": "Matrix Multiplication - Intel Advisor", "categories": ["Toolkit/Intel® oneAPI Base Toolkit/Advisor"], "description": "Simple program that shows how to improve the DPC++ Matrix Multiplication program using VTune Profiler and Advisor. ", @@ -8,37 +8,24 @@ i"guid":"6F00053A-11DC-40D1-90C7-1CFF822B874B", "languages": [{"cpp": { }}], "os": ["linux", "windows"], "targetDevice": ["CPU", "GPU"], - "builder": ["ide", "cmake"] + "builder": ["ide", "cmake"], + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [{ + "steps": [ + "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"", + "cd x64/Release", + "matrix_multiply.exe" + ] + }] +} } -{ - "path": "Advisor/matrix_multiply_advisor", - "configurations": [ - { - "build": [ - "cmake .", - "make" - ], - "run": [ - "./matrix.dpcpp" - ], - "clean": [ - "make clean" - ] - } - ] - }, -{ - "path": "Advisor/matrix_multiply_advisor", - "configurations": [ - { - "build": [ - "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"" - ], - "run": [ - "cd x64\\Release", - "matrix_multiply.exe" - ] - } - ] - } From 9862975127df00a529200ed33a495251b34cbd97 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:37:42 -0700 Subject: [PATCH 07/23] Remove chrono --- Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp index 52e27f3422..6653c33681 100644 --- a/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp +++ b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp @@ -5,9 +5,9 @@ // ============================================================= #include -#include #include +#include "dpc_common.hpp" #include "multiply.hpp" typedef unsigned long long UINT64; @@ -86,12 +86,10 @@ int main() { cout << "Using multiply kernel: "<< xstr(MULTIPLY)<< "\n"; // start timing the matrix multiply code - auto start = std::chrono::steady_clock::now(); + dpc_common::TimeInterval matrix_time;; ParallelMultiply(NUM, a, b, c, t); - auto end = std::chrono::steady_clock::now(); - - std::chrono::duration elapsed_seconds = end-start; - cout << "Elapsed Time: " << elapsed_seconds.count() << "s\n"; + double matrix_elapsed = matrix_time.Elapsed(); + cout << "Elapsed Time: " << matrix_elapsed << "s\n"; // free memory free(buf1); From 4d86a157388ec1185980e3612e6fbad666fbe173 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:40:40 -0700 Subject: [PATCH 08/23] Remove date --- Tools/Advisor/matrix_multiply_advisor/License.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/License.txt b/Tools/Advisor/matrix_multiply_advisor/License.txt index da5f7c1888..e63c6e13dc 100755 --- a/Tools/Advisor/matrix_multiply_advisor/License.txt +++ b/Tools/Advisor/matrix_multiply_advisor/License.txt @@ -1,4 +1,4 @@ -Copyright 2019 Intel Corporation +Copyright Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: From cd3d7343efcaaacbaf0eaf675b249ca1fa1f2e34 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:44:13 -0700 Subject: [PATCH 09/23] Remove date --- Tools/VTuneProfiler/matrix_multiply_vtune/License.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt b/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt index da5f7c1888..e63c6e13dc 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/License.txt @@ -1,4 +1,4 @@ -Copyright 2019 Intel Corporation +Copyright Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: From 407cf9ac46ec44f3933fecfcab4465d6d9b42971 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:48:06 -0700 Subject: [PATCH 10/23] Fix --- .../matrix_multiply_vtune/sample.json | 55 +++++++------------ 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json index 866dcf9f85..a00a831f0a 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json @@ -1,5 +1,5 @@ { -"guid":"D5D48B97-C29C-4386-A6D2-DB84D006D6A9" +"guid":"D5D48B97-C29C-4386-A6D2-DB84D006D6A9", "name": "Matrix Multiplication - Intel VTune Profiler", "categories": ["Toolkit/Intel® oneAPI Base Toolkit/VTune Profiler"], "description": "Simple program that shows how to improve the DPC++ Matrix Multiplication program using VTune Profiler and Advisor.", @@ -9,37 +9,22 @@ "os": ["linux", "windows"], "targetDevice": ["CPU", "GPU"], "builder": ["ide", "cmake"] -}, -{ - "path": "VTuneProfiler/matrix_multiply_vtune", - "configurations": [ - { - "build": [ - "cmake .", - "make" - ], - "run": [ - "./matrix.dpcpp" - ], - "clean": [ - "make clean" - ] - } - ] - }, -{ - "path": "VTuneProfiler/matrix_multiply_vtune", - "configurations": [ - { - "build": [ - "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"" - ], - "run": [ - "cd x64\\Release", - "matrix_multiply.exe" - ] - } - ] - } - - + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [{ + "steps": [ + "MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration=\"Release\"", + "cd x64/Release", + "matrix_multiply.exe" + ] + }] +} +} From ce9ad28f6ae11f08593fa3f4440cdd9ee75b62dd Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:51:55 -0700 Subject: [PATCH 11/23] Remove chrono --- .../VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp index 52e27f3422..6653c33681 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp @@ -5,9 +5,9 @@ // ============================================================= #include -#include #include +#include "dpc_common.hpp" #include "multiply.hpp" typedef unsigned long long UINT64; @@ -86,12 +86,10 @@ int main() { cout << "Using multiply kernel: "<< xstr(MULTIPLY)<< "\n"; // start timing the matrix multiply code - auto start = std::chrono::steady_clock::now(); + dpc_common::TimeInterval matrix_time;; ParallelMultiply(NUM, a, b, c, t); - auto end = std::chrono::steady_clock::now(); - - std::chrono::duration elapsed_seconds = end-start; - cout << "Elapsed Time: " << elapsed_seconds.count() << "s\n"; + double matrix_elapsed = matrix_time.Elapsed(); + cout << "Elapsed Time: " << matrix_elapsed << "s\n"; // free memory free(buf1); From c5eedc8984311e804588cd5b6804dcbdd7e3baa6 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Fri, 21 Aug 2020 12:59:55 -0700 Subject: [PATCH 12/23] New style --- .../matrix_multiply_vtune/README.md | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index db52e3627f..3d11f95b1a 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -1,15 +1,21 @@ -# matrix multiply sample -A sample containing multiple implementations of matrix multiplication. This sample code is implemented using C++ and SYCL language for CPU and GPU. +# Matrix Multiply Sample +A sample containing multiple implementations of matrix multiplication. This sample code is implemented using DPC++ language for CPU and GPU. | Optimized for | Description |:--- |:--- | OS | Linux Ubuntu 18.04; Windows 10 | Hardware | Kaby Lake with GEN9 or newer -| Software | Intel(R) oneAPI DPC++ Compiler beta; Intel(R) VTune(TM) Profiler +| Software | Intel(R) oneAPI DPC++ Compiler (beta); VTune(TM) Profiler | What you will learn | How to profile an application using Intel(R) VTune(TM) Profiler | Time to complete | 15 minutes - +## Purpose + +The Matrix Multiplication sample performs basic matrix multiplication. Three version are provided that use different features of DPC++. + +## Key Implementation details + +The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. ## License This code sample is licensed under MIT license @@ -26,7 +32,7 @@ Edit the line in multiply.h to select the version of the multiply function: #define MULTIPLY multiply1 -### on Linux +### On a Linux* System To build DPC++ version: cd cmake . @@ -35,21 +41,36 @@ Edit the line in multiply.h to select the version of the multiply function: Clean the program make clean -### on Windows - Visual Studio 2017 or newer +### On a Windows* System Using Visual Studio 2017 or newer * Open Visual Studio 2017 * Select Menu "File > Open > Project/Solution", find "matrix_multiply" folder and select "matrix_multiply.sln" * Select Menu "Project > Build" to build the selected configuration * Select Menu "Debug > Start Without Debugging" to run the program - + ### on Windows - command line - Build the program using MSBuild DPCPP Configurations: Release - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Release" Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" +## Running the Sample + +### Example of Output + +./matrix.dpcpp +Address of buf1 = 0x7f5e687eb010 +Offset of buf1 = 0x7f5e687eb180 +Address of buf2 = 0x7f5e67fea010 +Offset of buf2 = 0x7f5e67fea1c0 +Address of buf3 = 0x7f5e677e9010 +Offset of buf3 = 0x7f5e677e9100 +Address of buf4 = 0x7f5e66fe8010 +Offset of buf4 = 0x7f5e66fe8140 +Using multiply kernel: multiply1 +Running on Intel(R) Gen9 +Elapsed Time: 0.539631s + ## Running an Intel VTune Profiler analysis ------------------------------------------ vtune -collect gpu-hotspots -- ./matrix.dpcpp - - From eef2819be6c43260bcbc0356f51e836b9acc123a Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 06:37:30 -0700 Subject: [PATCH 13/23] Fix --- .../matrix_multiply_vtune/README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index 3d11f95b1a..e375adf651 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -16,6 +16,8 @@ The Matrix Multiplication sample performs basic matrix multiplication. Three ver ## Key Implementation details The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. +Include Files +The include folder is located at %ONEAPI_ROOT%\dev-utilities\latest\include on your development system. ## License This code sample is licensed under MIT license @@ -60,13 +62,13 @@ Edit the line in multiply.h to select the version of the multiply function: ./matrix.dpcpp Address of buf1 = 0x7f5e687eb010 Offset of buf1 = 0x7f5e687eb180 -Address of buf2 = 0x7f5e67fea010 -Offset of buf2 = 0x7f5e67fea1c0 -Address of buf3 = 0x7f5e677e9010 -Offset of buf3 = 0x7f5e677e9100 -Address of buf4 = 0x7f5e66fe8010 -Offset of buf4 = 0x7f5e66fe8140 -Using multiply kernel: multiply1 +Address of buf2 = 0x7f5e67fea010 +Offset of buf2 = 0x7f5e67fea1c0 +Address of buf3 = 0x7f5e677e9010 +Offset of buf3 = 0x7f5e677e9100 +Address of buf4 = 0x7f5e66fe8010 +Offset of buf4 = 0x7f5e66fe8140 +Using multiply kernel: multiply1 Running on Intel(R) Gen9 Elapsed Time: 0.539631s From 1984acd4b8d9f76db293b9b19f76cbad6770912a Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 06:43:45 -0700 Subject: [PATCH 14/23] Fix --- .../matrix_multiply_vtune/README.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index e375adf651..e43ebd1b9d 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -59,18 +59,18 @@ Edit the line in multiply.h to select the version of the multiply function: ### Example of Output -./matrix.dpcpp -Address of buf1 = 0x7f5e687eb010 -Offset of buf1 = 0x7f5e687eb180 -Address of buf2 = 0x7f5e67fea010 -Offset of buf2 = 0x7f5e67fea1c0 -Address of buf3 = 0x7f5e677e9010 -Offset of buf3 = 0x7f5e677e9100 -Address of buf4 = 0x7f5e66fe8010 -Offset of buf4 = 0x7f5e66fe8140 -Using multiply kernel: multiply1 -Running on Intel(R) Gen9 -Elapsed Time: 0.539631s + ./matrix.dpcpp + Address of buf1 = 0x7f5e687eb010 + Offset of buf1 = 0x7f5e687eb180 + Address of buf2 = 0x7f5e67fea010 + Offset of buf2 = 0x7f5e67fea1c0 + Address of buf3 = 0x7f5e677e9010 + Offset of buf3 = 0x7f5e677e9100 + Address of buf4 = 0x7f5e66fe8010 + Offset of buf4 = 0x7f5e66fe8140 + Using multiply kernel: multiply1 + Running on Intel(R) Gen9 + Elapsed Time: 0.539631s ## Running an Intel VTune Profiler analysis ------------------------------------------ From ffaec997f33eddf473d12cf2ca59985b8bce3677 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 06:45:59 -0700 Subject: [PATCH 15/23] iFix --- Tools/VTuneProfiler/matrix_multiply_vtune/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index e43ebd1b9d..b3f8504d92 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -57,7 +57,7 @@ Edit the line in multiply.h to select the version of the multiply function: ## Running the Sample -### Example of Output +## Example of Output ./matrix.dpcpp Address of buf1 = 0x7f5e687eb010 From 72f015866bef82a1c69443edbdef28b796331d6e Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 06:53:42 -0700 Subject: [PATCH 16/23] iFixi --- Tools/VTuneProfiler/matrix_multiply_vtune/README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index b3f8504d92..8ca2359861 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -55,10 +55,7 @@ Edit the line in multiply.h to select the version of the multiply function: Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" -## Running the Sample - ## Example of Output - ./matrix.dpcpp Address of buf1 = 0x7f5e687eb010 Offset of buf1 = 0x7f5e687eb180 From b898b0beee35d0159ad1a97755350bb6fdd7aac1 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 06:57:33 -0700 Subject: [PATCH 17/23] iFix --- Tools/VTuneProfiler/matrix_multiply_vtune/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index 8ca2359861..3b0401f6dc 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -58,7 +58,7 @@ Edit the line in multiply.h to select the version of the multiply function: ## Example of Output ./matrix.dpcpp Address of buf1 = 0x7f5e687eb010 - Offset of buf1 = 0x7f5e687eb180 + Offset of buf1 = 0x7f5e687eb010 Address of buf2 = 0x7f5e67fea010 Offset of buf2 = 0x7f5e67fea1c0 Address of buf3 = 0x7f5e677e9010 From 2f9fb0215b3d8ae4beba6e675a3d49f3f0f23a2b Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 07:01:36 -0700 Subject: [PATCH 18/23] Fix --- Tools/VTuneProfiler/matrix_multiply_vtune/README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index 3b0401f6dc..1f5cd1abbc 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -57,14 +57,6 @@ Edit the line in multiply.h to select the version of the multiply function: ## Example of Output ./matrix.dpcpp - Address of buf1 = 0x7f5e687eb010 - Offset of buf1 = 0x7f5e687eb010 - Address of buf2 = 0x7f5e67fea010 - Offset of buf2 = 0x7f5e67fea1c0 - Address of buf3 = 0x7f5e677e9010 - Offset of buf3 = 0x7f5e677e9100 - Address of buf4 = 0x7f5e66fe8010 - Offset of buf4 = 0x7f5e66fe8140 Using multiply kernel: multiply1 Running on Intel(R) Gen9 Elapsed Time: 0.539631s From d8c8314aca61679c231664b922fa2fa81280e874 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 07:03:11 -0700 Subject: [PATCH 19/23] Fix --- Tools/VTuneProfiler/matrix_multiply_vtune/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md index 1f5cd1abbc..dc9232b191 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/README.md +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/README.md @@ -57,8 +57,11 @@ Edit the line in multiply.h to select the version of the multiply function: ## Example of Output ./matrix.dpcpp + Using multiply kernel: multiply1 + Running on Intel(R) Gen9 + Elapsed Time: 0.539631s ## Running an Intel VTune Profiler analysis From 3837b2268f7106fbc84caf6a8d72a4c44dee7886 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 07:06:45 -0700 Subject: [PATCH 20/23] Fix --- .../Advisor/matrix_multiply_advisor/README.md | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Tools/Advisor/matrix_multiply_advisor/README.md b/Tools/Advisor/matrix_multiply_advisor/README.md index 8b08d385fb..d68cee206d 100644 --- a/Tools/Advisor/matrix_multiply_advisor/README.md +++ b/Tools/Advisor/matrix_multiply_advisor/README.md @@ -16,6 +16,7 @@ The Matrix Multiplication sample performs basic matrix multiplication. Three ver ## Key Implementation details The basic DPC++ implementation explained in the code includes device selector, buffer, accessor, kernel, and command groups. +The include folder is located at %ONEAPI_ROOT%\dev-utilities\latest\include on your development system. ## License This code sample is licensed under MIT license @@ -53,22 +54,16 @@ Edit the line in multiply.h to select the version of the multiply function: Debug - MSBuild matrix_multiply.sln /t:Rebuild /p:Configuration="Debug" -## Running the Sample ### Example of Output -./matrix.dpcpp -Address of buf1 = 0x7f5e687eb010 -Offset of buf1 = 0x7f5e687eb180 -Address of buf2 = 0x7f5e67fea010 -Offset of buf2 = 0x7f5e67fea1c0 -Address of buf3 = 0x7f5e677e9010 -Offset of buf3 = 0x7f5e677e9100 -Address of buf4 = 0x7f5e66fe8010 -Offset of buf4 = 0x7f5e66fe8140 -Using multiply kernel: multiply1 -Running on Intel(R) Gen9 -Elapsed Time: 0.539631s + ./matrix.dpcpp + + Using multiply kernel: multiply1 + + Running on Intel(R) Gen9 + + Elapsed Time: 0.539631s ## Running an Intel Advisor analysis ------------------------------------------ From 21a83701886a00002d725325b9a7df8844ca8006 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 07:09:46 -0700 Subject: [PATCH 21/23] Add comment --- Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp index 6653c33681..5914031d96 100644 --- a/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp +++ b/Tools/Advisor/matrix_multiply_advisor/src/matrix.cpp @@ -7,6 +7,8 @@ #include #include +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp #include "dpc_common.hpp" #include "multiply.hpp" From 8c1df1fc48e9f97d08c41fc371e2ef87d0e912e5 Mon Sep 17 00:00:00 2001 From: "kevin.p.oleary" Date: Mon, 24 Aug 2020 07:10:59 -0700 Subject: [PATCH 22/23] Add comment --- Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp index 6653c33681..5914031d96 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/src/matrix.cpp @@ -7,6 +7,8 @@ #include #include +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp #include "dpc_common.hpp" #include "multiply.hpp" From ae8c91ea2490e07d35a0b7c285e859bdd2d6a536 Mon Sep 17 00:00:00 2001 From: JoeOster <52936608+JoeOster@users.noreply.github.com> Date: Mon, 24 Aug 2020 09:52:18 -0700 Subject: [PATCH 23/23] Update sample.json --- Tools/VTuneProfiler/matrix_multiply_vtune/sample.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json index a00a831f0a..de7c762242 100644 --- a/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json +++ b/Tools/VTuneProfiler/matrix_multiply_vtune/sample.json @@ -8,7 +8,7 @@ "languages": [{"cpp": { }}], "os": ["linux", "windows"], "targetDevice": ["CPU", "GPU"], - "builder": ["ide", "cmake"] + "builder": ["ide", "cmake"], "ciTests": { "linux": [{ "steps": [