Merge remote-tracking branch 'upstream/master'

NeoZhangJianyu · NeoZhangJianyu · commit 8d667abc7deb · 2021-03-26T09:35:23.000+08:00
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Horovod_Multinode_Training/README.md b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Horovod_Multinode_Training/README.md
@@ -42,41 +42,37 @@ TensorFlow is ready for use once you finish the Intel AI Analytics Toolkit insta
 You can refer to the oneAPI [main page](https://software.intel.com/en-us/oneapi) for toolkit installation and the Toolkit [Getting Started Guide for Linux](https://software.intel.com/en-us/get-started-with-intel-oneapi-linux-get-started-with-the-intel-ai-analytics-toolkit) for post-installation steps and scripts.
 
 
-### On a Linux* System
-#### Activate conda environment With Root Access
+### Sourcing the oneAPI AI Analytics Toolkit environment variables
 
-Navigate in Linux shell to your oneapi installation path, typically `/opt/intel/oneapi`. Activate the conda environment with the following command:
+By default, the Intel AI Analytics toolkit is installed in the `/opt/intel/oneapi` folder. The toolkit may be loaded by sourcing the `setvars.sh` script on a Linux shell. Notice the flag `--ccl-configuration=cpu_icc`. By default, the `ccl-configuration` is set to `cpu_gpu_dpcpp`. However, since we are distributing our TensorFlow workload on multiple CPU nodes, we are configuring the Horovod installation to use CPUs. 
 
 ```
-source /opt/intel/oneapi/setvars.sh
-source activate tensorflow
+source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_icc
 ```
 
-#### Activate conda environment Without Root Access (Optional)
+### Creating a TensorFlow environment with Horovod
 
-By default, the Intel AI Analytics toolkit is installed in the `/opt/intel/oneapi` folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command:
+Let's proceed with creating a conda environment with the Intel-optimized TensorFlow and horovod installed. Execute the following commands:
 
 ```
-conda create --name user_tensorflow --clone tensorflow
+conda create --name tensorflow_horovod 
+conda activate tensorflow_horovod 
 ```
 
-Then activate your conda environment with the following command:
+Find the path where the `tensorflow_horovod` conda environment has been created. 
 
 ```
-source activate user_tensorflow
+conda install -c "/opt/intel/oneapi/conda_channel" -p <path_of_tensorflow_horovod_env>/tensorflow_horovod -y -q conda python=3.7 numpy intel-openmp tensorflow --offline
 ```
 
-## Running the Sample
-
-Before running the sample, you will need to install the 3rd-party [Horovod](https://github.com/horovod/horovod) framework. 
-
-After you have activated your conda environment, you may wish to execute the following commands to install `horovod`:
+Before running the sample, you will need to install the 3rd-party [Horovod](https://github.com/horovod/horovod) framework. Proceed with installing Horovod with the follwing command:
 ```
-export HOROVOD_WITHOUT_MPI=1 #Optional, in case you encounter MPI-related install issues
-pip install horovod
+env HOROVOD_WITHOUT_MPI=1 HOROVOD_CPU_OPERATIONS=CCL HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 python -m pip install --upgrade --force-reinstall --no-cache-dir horovod
 ```
 
-To the script on one machine without invoking Horovod, type the following command in the terminal with Python installed:
+## Running the Sample
+
+To execute the script on one machine without invoking Horovod, type the following command in the terminal with Python installed:
 ```
     python TensorFlow_Multinode_Training_with_Horovod.py
 ```
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Horovod_Multinode_Training/sample.json b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Horovod_Multinode_Training/sample.json
@@ -9,11 +9,10 @@
   "ciTests": {
 	"linux": [
 	{
-		"id": "tensorflow horovod",
+		"env": ["source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_icc --force", "conda create -n horovod_test -c intel -c conda python=3.7 numpy intel-openmp tensorflow=2.3.0", "source activate horovod_test"],
+        "id": "tensorflow horovod",
 		"steps": [
-			"source activate tensorflow",
-            "export HOROVOD_WITHOUT_MPI=1",
-            "pip install horovod",
+            "env HOROVOD_WITHOUT_MPI=1 HOROVOD_CPU_OPERATIONS=CCL HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 python -m pip install --upgrade --force-reinstall --no-cache-dir horovod",
 			"horovodrun -np 2 python TensorFlow_Multinode_Training_with_Horovod.py"
 		 ]
 	}
diff --git a/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_PerformanceAnalysis/README.md b/AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_PerformanceAnalysis/README.md
@@ -55,7 +55,7 @@ Third party program Licenses can be found here: [third-party-programs.txt](https
 
 1. Create conda env: `$conda create -n stock-tensorflow python matplotlib ipykernel psutil pandas gitpython`
 2. Activate the created conda env: `$source activate stock-tensorflow.`
-3. Install stock Tensorflow with a specific version: `(stock-tensorflow) $pip install tensorflow==2.2.0`
+3. Install stock Tensorflow with a specific version: `(stock-tensorflow) $pip install tensorflow==2.3.0`
 4. Install extra needed package: `(stock-tensorflow) $pip install cxxfilt`
 5. Deactivate conda env: `(stock-tensorflow)$conda deactivate`
 6. Register the kernel to Jupyter NB: `$~/.conda/envs/stock-tensorflow/bin/python -m ipykernel install --user --name=stock-tensorflow`
@@ -68,7 +68,7 @@ Third party program Licenses can be found here: [third-party-programs.txt](https
 > NOTE: Intel-optimized Tensorflow is on DevCloud. However, users don't have access to install extra packages. 
   Therefore, we need to clone Intel Tensorflow into the user's home directory for installing extra packages.
 
-1. Source oneAPI environment variables: `$source /opt/intel/inteloneapi/setvars.sh`
+1. Source oneAPI environment variables: `$source /opt/intel/oneapi/setvars.sh`
 2. Create conda env: `$conda create --name intel-tensorflow --clone tensorflow`
 3. Activate the created conda env: `$source activate intel-tensorflow`
 4  Install the extra needed package: `(intel-tensorflow) $pip install cxxfilt matplotlib ipykernel psutil pandas gitpython`
@@ -85,7 +85,7 @@ Third party program Licenses can be found here: [third-party-programs.txt](https
 1. Create conda env: `$conda create -n stock-tensorflow python matplotlib ipykernel psutil pandas gitpython`
 
 2. Activate the created conda env: `$conda activate stock-tensorflow`
-3. Install stock tensorflow with a specific version: `(stock-tensorflow) $pip install tensorflow==2.2.0`
+3. Install stock tensorflow with a specific version: `(stock-tensorflow) $pip install tensorflow==2.3.0`
 4. Install extra needed package: `(stock-tensorflow) $pip install cxxfilt`
 5. Deactivate conda env: `(stock-tensorflow)$conda deactivate`
 6. Register the kernel to Jupyter NB: `$~/anaconda3/envs/stock-tensorflow/bin/python  -m ipykernel install --user --name=stock-tensorflow`
@@ -108,17 +108,18 @@ Third party program Licenses can be found here: [third-party-programs.txt](https
 
 ### Running the Sample
 
-1. Copy the Intel Model Zoo from your AI Analytics Toolkit installation path: `$cp -rf /opt/intel/inteloneapi/modelzoo/latest/models ~/`
-2. Launch Jupyter notebook: `$jupyter notebook --ip=0.0.0.0`
+1. Copy the Intel Model Zoo from your AI Analytics Toolkit installation path: `$cp -rf /opt/intel/oneapi/modelzoo/latest/models ~/`
+2. cd ~/models; git init; git add . ; git commit -m 'initial commit'
+3. Launch Jupyter notebook: `$jupyter notebook --ip=0.0.0.0`
 
 > NOTE: Users don't need to apply step 2 on DevCloud Environment.
 
-3. Follow the instructions to open the URL with the token in your browser
-4. Browse to the `models/docs/notebooks/perf_analysis` folder
-5. Click the `benchmark_perf_comparison.ipynb` or `benchmark_perf_timeline_analysis.ipynb` file
-6. Change your Jupyter notebook kernel to either "stock-tensorflow" or "intel-tensorflow" (highlighted in the diagram below)
+4. Follow the instructions to open the URL with the token in your browser
+5. Browse to the `models/docs/notebooks/perf_analysis` folder
+6. Click the `benchmark_perf_comparison.ipynb` or `benchmark_perf_timeline_analysis.ipynb` file
+7. Change your Jupyter notebook kernel to either "stock-tensorflow" or "intel-tensorflow" (highlighted in the diagram below)
     <br><img src="images/jupyter_kernels.png" width="300" height="300"><br>
-7. Run through every cell of the notebook one by one
+8. Run through every cell of the notebook one by one
 
 > NOTE: To compare stock and Intel-optimized TF results in the section "Analyze TF Timeline results among Stock and Intel Tensorflow," users need to run all cells before the comparison section with both stock-tensorflow and intel-tensorflow kernels.
 
diff --git a/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/maximum_function.cpp b/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/maximum_function.cpp
@@ -3,7 +3,9 @@
 //
 // SPDX-License-Identifier: MIT
 // =============================================================
+#include <oneapi/dpl/numeric>
 #include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/functional>
 #include <oneapi/dpl/execution>
 #include <oneapi/dpl/iterator>
 
diff --git a/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/minimum_function.cpp b/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/minimum_function.cpp
@@ -3,7 +3,9 @@
 //
 // SPDX-License-Identifier: MIT
 // =============================================================
+#include <oneapi/dpl/numeric>
 #include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/functional>
 #include <oneapi/dpl/execution>
 #include <oneapi/dpl/iterator>
 
diff --git a/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/transform_iterator.cpp b/DirectProgramming/DPC++/Jupyter/oneapi-essentials-training/07_DPCPP_Library/lab/transform_iterator.cpp
@@ -3,7 +3,7 @@
 //
 // SPDX-License-Identifier: MIT
 // =============================================================
-#include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/numeric>
 #include <oneapi/dpl/execution>
 #include <oneapi/dpl/iterator>
 
diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/db/README.md b/DirectProgramming/DPC++FPGA/ReferenceDesigns/db/README.md
@@ -249,7 +249,7 @@ You should see the following output in the console:
 ### Database files
 In the `data/` directory, you will find database files for a scale factor of 0.01. These files were generated manually and can be used to verify the queries in emulation. However, **these files are too small to showcase the true performance of the FPGA hardware**.
 
-To generate larger database files to run on the hardware, you can use TPC's `dbgen` tool. Instructions for downloading, building and running the `dbgen` tool can be found in the [TPC-H documents](http://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.18.0.pdf) on the [TPC-H website](http://www.tpc.org/tpch/). Note that this reference design currently only supports TPC-H databases with scale factors of 0.01 or 1.
+To generate larger database files to run on the hardware, you can use TPC's `dbgen` tool. Instructions for downloading, building and running the `dbgen` tool can be found on the [TPC-H website](http://www.tpc.org/tpch/). Note that this reference design currently only supports databases with scale factors of 0.01 or 1.
 
 ### Query Implementation
 The following sections will describe, at a high level, how queries 1, 9, 11 and 12 are implemented on the FPGA using a set of generalized database operators (found in `db_utils/`). In the block diagrams below, the blocks are oneAPI kernels, and the arrows represent `pipes` that shows the flow of data from one kernel to another.
@@ -271,10 +271,5 @@ Query 11 showcases the `MapJoin` and `FifoSort` database operators. The block di
 Query 12 showcases the `MergeJoin` database operator. The block diagram of the design is shown below.
 
 ![](q12.png)
-      
-## References
-[Khronous SYCL Resources](https://www.khronos.org/sycl/resources) </br>
-[TPC Website](http://www.tpc.org/tpch/) </br>
-[TPC-H Document](http://www.tpc.org/tpc_documents_current_versions/pdf/tpc-h_v2.18.0.pdf) </br>
 
 
diff --git a/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt
@@ -52,7 +52,7 @@ else()
     message(STATUS "\tAn invalid board name was passed in using the FPGA_BOARD flag. Configuring the design to run on the Intel(R) Programmable Acceleration Card (PAC) with Intel Arria(R) 10 GX FPGA. Please refer to the README for the list of valid board names.")
 endif()
 
-set(HARDWARE_COMPILE_FLAGS -fintelfpga -c -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT})
+set(HARDWARE_COMPILE_FLAGS -fintelfpga -c -fno-fast-math -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT})
 
 # use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation
 separate_arguments(USER_HARDWARE_FLAGS)
diff --git a/Frameworks/TensorFlow/Distributed_Tensorflow_Horovod/README.md b/Frameworks/TensorFlow/Distributed_Tensorflow_Horovod/README.md
@@ -0,0 +1,2 @@
+to delete - moved to new folder structure
+
diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt b/Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt
@@ -12,6 +12,7 @@ if("$ENV{EXAMPLE_ROOT}" STREQUAL "")
 	file(COPY $ENV{CCL_ROOT}/examples/cpu DESTINATION src)
 	file(COPY $ENV{CCL_ROOT}/examples/common DESTINATION src)
 	file(COPY $ENV{CCL_ROOT}/examples/benchmark DESTINATION src)
+	file(COPY $ENV{CCL_ROOT}/examples/external_launcher DESTINATION src)
 	file(COPY $ENV{CCL_ROOT}/examples/include DESTINATION src)
 	file(COPY $ENV{CCL_ROOT}/examples/CMakeLists.txt DESTINATION src)
 	add_subdirectory (${PROJECT_BINARY_DIR}/src out)

Original file line number	Diff line number	Diff line change
`@@ -9,11 +9,10 @@`
`9`	`9`	`"ciTests": {`
`10`	`10`	`"linux": [`
`11`	`11`	`{`
`12`		`- "id": "tensorflow horovod",`
	`12`	`+ "env": ["source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_icc --force", "conda create -n horovod_test -c intel -c conda python=3.7 numpy intel-openmp tensorflow=2.3.0", "source activate horovod_test"],`
	`13`	`+ "id": "tensorflow horovod",`
`13`	`14`	`"steps": [`
`14`		`- "source activate tensorflow",`
`15`		`- "export HOROVOD_WITHOUT_MPI=1",`
`16`		`- "pip install horovod",`
	`15`	`+ "env HOROVOD_WITHOUT_MPI=1 HOROVOD_CPU_OPERATIONS=CCL HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 python -m pip install --upgrade --force-reinstall --no-cache-dir horovod",`
`17`	`16`	`"horovodrun -np 2 python TensorFlow_Multinode_Training_with_Horovod.py"`
`18`	`17`	`]`
`19`	`18`	`}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+to delete - moved to new folder structure`
	`2`	`+`