diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md index a99d5b006c..1124c82f7d 100644 --- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md +++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md @@ -62,6 +62,7 @@ These intrinsics samples have relatively few modifiable parameters. However, cer ### Example of Output ``` Dot Product computed by C: 4324.000000 +Dot Product computed by C + SIMD: 4324.000000 Dot Product computed by Intel(R) SSE3 intrinsics: 4324.000000 Dot Product computed by Intel(R) AVX2 intrinsics: 4324.000000 Dot Product computed by Intel(R) AVX intrinsics: 4324.000000 diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp index 7a353b8853..f4774140db 100644 --- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp +++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp @@ -30,13 +30,17 @@ * */ #include +#include #include #include + #define SIZE 24 // assumes size is a multiple of 8 because // Intel(R) AVX registers will store 8, 32bit elements. // Computes dot product using C float dot_product(float *a, float *b); +// Computes dot product using SIMD +float dot_product_SIMD(float *a, float *b); // Computes dot product using Intel(R) SSE intrinsics float dot_product_intrin(float *a, float *b); // Computes dot product using Intel(R) AVX intrinsics @@ -59,9 +63,13 @@ int main() { a[i] = i; b[i] = i; } + product = dot_product(x, y); printf("Dot Product computed by C: %f\n", product); + product = dot_product_SIMD(x, y); + printf("Dot Product computed by C + SIMD: %f\n", product); + product = dot_product_intrin(x, y); printf("Dot Product computed by Intel(R) SSE3 intrinsics: %f\n", product); @@ -106,6 +114,16 @@ float dot_product(float *a, float *b) { return sum; } +float dot_product_SIMD(float *a, float *b) { + int i; + int sum = 0; +#pragma omp simd reduction(+ : sum) + for (i = 0; i < SIZE; i++) { + sum += a[i] * b[i]; + } + return sum; +} + // The Visual Studio* editor will show the following section as disabled as it // does not know that __INTEL_COMPILER is defined by the Intel(R) Compiler #if __INTEL_COMPILER @@ -193,7 +211,7 @@ float dot_product_intrin(float *a, float *b) { b + i); // loads unaligned array b into num2 num2= b[3] b[2] b[1] b[0] num3 = _mm_mul_ps(num1, num2); // performs multiplication num3 = - // a[3]*b[3] a[2]*b[2] a[1]*b[1] a[0]*b[0] + // a[3]*b[3] a[2]*b[2] a[1]*b[1] a[0]*b[0] num3 = _mm_hadd_ps(num3, num3); // performs horizontal addition // num3= a[3]*b[3]+ a[2]*b[2] a[1]*b[1]+a[0]*b[0] a[3]*b[3]+ a[2]*b[2] // a[1]*b[1]+a[0]*b[0]