From 3aec3eb8851e3520a87f76b8668f6d36eb7a0926 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:14:05 -0700
Subject: [PATCH 1/9] added simd dot_prod implementation

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../Intrinsics/src/intrin_dot_sample.cpp        | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index 7a353b8853..1ac331caa6 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -32,11 +32,15 @@
 #include <immintrin.h>
 #include <pmmintrin.h>
 #include <stdio.h>
+#include <omp.h>
+
 #define SIZE 24  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.
 
 // Computes dot product using C
 float dot_product(float *a, float *b);
+// Computes dot product using SIMD
+float dot_product_SIMD(float *a, float *b);
 // Computes dot product using Intel(R) SSE intrinsics
 float dot_product_intrin(float *a, float *b);
 // Computes dot product using Intel(R) AVX intrinsics
@@ -62,6 +66,9 @@ int main() {
   product = dot_product(x, y);
   printf("Dot Product computed by C:  %f\n", product);
 
+  product = dot_product_SIMD(x, y);
+  printf("Dot Product computed by C + SIMD:  %f\n", product);
+
   product = dot_product_intrin(x, y);
   printf("Dot Product computed by Intel(R) SSE3 intrinsics:  %f\n", product);
 
@@ -106,6 +113,16 @@ float dot_product(float *a, float *b) {
   return sum;
 }
 
+float dot_product_SIMD(float *a, float *b) {
+  int i;
+  int sum = 0;
+  #pragma omp simd
+  for (i = 0; i < SIZE; i++) {
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+
 // The Visual Studio* editor will show the following section as disabled as it
 // does not know that __INTEL_COMPILER is defined by the Intel(R) Compiler
 #if __INTEL_COMPILER

From 2b816f40b5ada52e15a1b16ddf511340444ad1b5 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:22:55 -0700
Subject: [PATCH 2/9] adding timing

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../Intrinsics/src/intrin_dot_sample.cpp      | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index 1ac331caa6..ca9df40779 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -33,6 +33,7 @@
 #include <pmmintrin.h>
 #include <stdio.h>
 #include <omp.h>
+#include <chrono>
 
 #define SIZE 24  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.
@@ -51,20 +52,42 @@ short MMX_dot_product(short *a, short *b);
 
 #define MMX_DOT_PROD_ENABLED (__INTEL_COMPILER || (_MSC_VER && !_WIN64))
 
+// Object to allow for measuring computation time
+class TimeInterval {
+ public:
+  TimeInterval() : start_(std::chrono::steady_clock::now()) {}
+
+  double Elapsed() {
+    auto now = std::chrono::steady_clock::now();
+    return std::chrono::duration_cast<Duration>(now - start_).count();
+  }
+
+ private:
+  using Duration = std::chrono::duration<double>;
+  std::chrono::steady_clock::time_point start_;
+};
+
 int main() {
   float x[SIZE], y[SIZE];
   short a[SIZE], b[SIZE];
   int i;
   float product;
   short mmx_product;
+  double time;
   for (i = 0; i < SIZE; i++) {
     x[i] = i;
     y[i] = i;
     a[i] = i;
     b[i] = i;
   }
-  product = dot_product(x, y);
+
+  {
+    TimeInterval t;
+    product = dot_product(x, y);
+    time = t.Elapsed();
+  }
   printf("Dot Product computed by C:  %f\n", product);
+  printf("---Computation time:  %f\n", time);
 
   product = dot_product_SIMD(x, y);
   printf("Dot Product computed by C + SIMD:  %f\n", product);

From 3aef29e590be6b4db2df7643699eb8ac0f2d4c51 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:24:16 -0700
Subject: [PATCH 3/9] increase size

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index ca9df40779..6a7901eedb 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -35,7 +35,7 @@
 #include <omp.h>
 #include <chrono>
 
-#define SIZE 24  // assumes size is a multiple of 8 because
+#define SIZE 128  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.
 
 // Computes dot product using C

From 04a2bd93b151409c968f5c0266ababf5d61f2a41 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:28:41 -0700
Subject: [PATCH 4/9] removed timing bc it's useless lol

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../Intrinsics/src/intrin_dot_sample.cpp      | 23 +------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index 6a7901eedb..a0f9c9d2f8 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -52,28 +52,12 @@ short MMX_dot_product(short *a, short *b);
 
 #define MMX_DOT_PROD_ENABLED (__INTEL_COMPILER || (_MSC_VER && !_WIN64))
 
-// Object to allow for measuring computation time
-class TimeInterval {
- public:
-  TimeInterval() : start_(std::chrono::steady_clock::now()) {}
-
-  double Elapsed() {
-    auto now = std::chrono::steady_clock::now();
-    return std::chrono::duration_cast<Duration>(now - start_).count();
-  }
-
- private:
-  using Duration = std::chrono::duration<double>;
-  std::chrono::steady_clock::time_point start_;
-};
-
 int main() {
   float x[SIZE], y[SIZE];
   short a[SIZE], b[SIZE];
   int i;
   float product;
   short mmx_product;
-  double time;
   for (i = 0; i < SIZE; i++) {
     x[i] = i;
     y[i] = i;
@@ -81,13 +65,8 @@ int main() {
     b[i] = i;
   }
 
-  {
-    TimeInterval t;
-    product = dot_product(x, y);
-    time = t.Elapsed();
-  }
+  product = dot_product(x, y);
   printf("Dot Product computed by C:  %f\n", product);
-  printf("---Computation time:  %f\n", time);
 
   product = dot_product_SIMD(x, y);
   printf("Dot Product computed by C + SIMD:  %f\n", product);

From 745e53bd3656bb0be2d25d756088c1c413d51204 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:52:13 -0700
Subject: [PATCH 5/9] added reduction to simd

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index a0f9c9d2f8..f8208d65e6 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -118,7 +118,7 @@ float dot_product(float *a, float *b) {
 float dot_product_SIMD(float *a, float *b) {
   int i;
   int sum = 0;
-  #pragma omp simd
+  #pragma omp simd reduction(+:sum)
   for (i = 0; i < SIZE; i++) {
     sum += a[i] * b[i];
   }

From b6206bc23f4e12ddfd46198983f7e85d0856a195 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:55:02 -0700
Subject: [PATCH 6/9] sample size

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index f8208d65e6..c5e707c284 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -35,7 +35,7 @@
 #include <omp.h>
 #include <chrono>
 
-#define SIZE 128  // assumes size is a multiple of 8 because
+#define SIZE 24  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.
 
 // Computes dot product using C

From 0022320dd9d5ab36ebd139e5e26527cb10bd63b3 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 14:56:33 -0700
Subject: [PATCH 7/9] updated sample output

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../C++/CompilerInfrastructure/Intrinsics/README.md              | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md
index a99d5b006c..1124c82f7d 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/README.md
@@ -62,6 +62,7 @@ These intrinsics samples have relatively few modifiable parameters. However, cer
 ### Example of Output
 ```
 Dot Product computed by C:  4324.000000
+Dot Product computed by C + SIMD:  4324.000000
 Dot Product computed by Intel(R) SSE3 intrinsics:  4324.000000
 Dot Product computed by Intel(R) AVX2 intrinsics:  4324.000000
 Dot Product computed by Intel(R) AVX intrinsics:  4324.000000

From 4ba1e5f45b596ca74df88d80a5dc221e9f8ed909 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Mon, 24 Aug 2020 15:39:27 -0700
Subject: [PATCH 8/9] removed unused lib

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index c5e707c284..441a5a530e 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -33,7 +33,6 @@
 #include <pmmintrin.h>
 #include <stdio.h>
 #include <omp.h>
-#include <chrono>
 
 #define SIZE 24  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.

From 9a75101e5bd8a8537e289f02b37a70cebe3a7bf0 Mon Sep 17 00:00:00 2001
From: Ethan Hirsch <ethan.hirsch@intel.com>
Date: Tue, 25 Aug 2020 09:37:35 -0700
Subject: [PATCH 9/9] fixed formatting with clang-format

Signed-off-by: Ethan Hirsch <ethan.hirsch@intel.com>
---
 .../Intrinsics/src/intrin_dot_sample.cpp                    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
index 441a5a530e..f4774140db 100644
--- a/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
+++ b/DirectProgramming/C++/CompilerInfrastructure/Intrinsics/src/intrin_dot_sample.cpp
@@ -30,9 +30,9 @@
  *
  */
 #include <immintrin.h>
+#include <omp.h>
 #include <pmmintrin.h>
 #include <stdio.h>
-#include <omp.h>
 
 #define SIZE 24  // assumes size is a multiple of 8 because
 // Intel(R) AVX registers will store 8, 32bit elements.
@@ -117,7 +117,7 @@ float dot_product(float *a, float *b) {
 float dot_product_SIMD(float *a, float *b) {
   int i;
   int sum = 0;
-  #pragma omp simd reduction(+:sum)
+#pragma omp simd reduction(+ : sum)
   for (i = 0; i < SIZE; i++) {
     sum += a[i] * b[i];
   }
@@ -211,7 +211,7 @@ float dot_product_intrin(float *a, float *b) {
         b +
         i);  // loads unaligned array b into num2  num2= b[3]   b[2]   b[1] b[0]
     num3 = _mm_mul_ps(num1, num2);  // performs multiplication   num3 =
-                                    // a[3]*b[3]  a[2]*b[2]  a[1]*b[1]  a[0]*b[0]
+                                    // a[3]*b[3]  a[2]*b[2]  a[1]*b[1] a[0]*b[0]
     num3 = _mm_hadd_ps(num3, num3);  // performs horizontal addition
     // num3=  a[3]*b[3]+ a[2]*b[2]  a[1]*b[1]+a[0]*b[0]  a[3]*b[3]+ a[2]*b[2]
     // a[1]*b[1]+a[0]*b[0]