Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ These intrinsics samples have relatively few modifiable parameters. However, cer
### Example of Output
```
Dot Product computed by C: 4324.000000
Dot Product computed by C + SIMD: 4324.000000
Dot Product computed by Intel(R) SSE3 intrinsics: 4324.000000
Dot Product computed by Intel(R) AVX2 intrinsics: 4324.000000
Dot Product computed by Intel(R) AVX intrinsics: 4324.000000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@
*
*/
#include <immintrin.h>
#include <omp.h>
#include <pmmintrin.h>
#include <stdio.h>

#define SIZE 24 // assumes size is a multiple of 8 because
// Intel(R) AVX registers will store 8, 32bit elements.

// Computes dot product using C
float dot_product(float *a, float *b);
// Computes dot product using SIMD
float dot_product_SIMD(float *a, float *b);
// Computes dot product using Intel(R) SSE intrinsics
float dot_product_intrin(float *a, float *b);
// Computes dot product using Intel(R) AVX intrinsics
Expand All @@ -59,9 +63,13 @@ int main() {
a[i] = i;
b[i] = i;
}

product = dot_product(x, y);
printf("Dot Product computed by C: %f\n", product);

product = dot_product_SIMD(x, y);
printf("Dot Product computed by C + SIMD: %f\n", product);

product = dot_product_intrin(x, y);
printf("Dot Product computed by Intel(R) SSE3 intrinsics: %f\n", product);

Expand Down Expand Up @@ -106,6 +114,16 @@ float dot_product(float *a, float *b) {
return sum;
}

float dot_product_SIMD(float *a, float *b) {
int i;
int sum = 0;
#pragma omp simd reduction(+ : sum)
for (i = 0; i < SIZE; i++) {
sum += a[i] * b[i];
}
return sum;
}

// The Visual Studio* editor will show the following section as disabled as it
// does not know that __INTEL_COMPILER is defined by the Intel(R) Compiler
#if __INTEL_COMPILER
Expand Down Expand Up @@ -193,7 +211,7 @@ float dot_product_intrin(float *a, float *b) {
b +
i); // loads unaligned array b into num2 num2= b[3] b[2] b[1] b[0]
num3 = _mm_mul_ps(num1, num2); // performs multiplication num3 =
// a[3]*b[3] a[2]*b[2] a[1]*b[1] a[0]*b[0]
// a[3]*b[3] a[2]*b[2] a[1]*b[1] a[0]*b[0]
num3 = _mm_hadd_ps(num3, num3); // performs horizontal addition
// num3= a[3]*b[3]+ a[2]*b[2] a[1]*b[1]+a[0]*b[0] a[3]*b[3]+ a[2]*b[2]
// a[1]*b[1]+a[0]*b[0]
Expand Down