Neural Coder enable device detection and device compatibility anlaysis (#1461)

kaikaiyao · mengfei25 · web-flow · commit 691d0b870b3d · 2022-11-09T15:44:07.000+08:00
* Create device.py

* Update globals.py

* Update interface.py

* Update device.py

* Update device.py

* Create pytorch_mixed_precision_intel_gpu.yaml

* Update device.py

* Update device.py

* Update device.py

* Update device.py

* Update interface.py

* Update device.py

* Update interface.py

* Update globals.py

* fix bugs

* add code device compatibility analysis

* add code device compatibility analysis

* Update device.py

Co-authored-by: mengfeil &lt;mengfei.li@intel.com&gt;
diff --git a/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml b/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml
@@ -0,0 +1,28 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+transformation:
+  location:
+    - insert_above_inference_line
+    - indent_inference_line
+  content:
+    - |-
+      [+] import torch
+      [+] with torch.xpu.amp.autocast(dtype=torch.half):
+    - 1
+  order:
+    - below:
+      above:
+    - below:
+      above:
diff --git a/neural_coder/globals.py b/neural_coder/globals.py
@@ -35,10 +35,17 @@
 # load transformers class def by a cache file instead of on-the-fly catch
 cache_load_transformers = True
 
+# detected device
+device = "cpu_with_amx"
+
+# device compatibility of the code: e.g. ["cpu", "cuda"], ["cuda"]
+list_code_device_compatibility = ["cuda"]
+
 # quantization config for HuggingFace optimum-intel optimizations
 # it is either "" (None) or "xxx" (a string of config path)
 optimum_quant_config = ""
 
+
 def reset_globals():
     global list_code_path
 
diff --git a/neural_coder/interface.py b/neural_coder/interface.py
@@ -24,6 +24,23 @@
     os.makedirs("neural_coder_workspace")
 
 
+def detect_device_(logger):
+    # device detection
+    logger.info(f"Device detection started ...")
+    from .utils.device import detect_device
+    detect_device()
+    if globals.device == "cpu_with_amx":
+        logger.info(f"Device: CPU with AMX")
+    elif globals.device == "cpu_without_amx":
+        logger.info(f"Device: CPU without AMX")
+    elif globals.device == "intel_gpu":
+        logger.info(f"Device: Intel(R) GPU")
+    elif globals.device == "cuda":
+        logger.info(f"Device: CUDA")
+    elif globals.device == "mutli":
+        logger.info(f"Device: Multi-Device")
+
+
 def enable(
     code,
     features,
@@ -81,6 +98,9 @@ def enable(
     logger.addHandler(fh)
     logger.addHandler(ch)
 
+    # device detection
+    detect_device_(logger)
+
     # print key inputs
     logger.info(f"Enabling started ...")
     logger.info(f"code: {code}")
@@ -467,6 +487,9 @@ def bench(
     logger.addHandler(ch)
     logger.addHandler(fh)
 
+    # device detection
+    detect_device_(logger)
+
     # print key inputs
     logger.info(f"Benchmarking started ...")
     logger.info(f"code: {code}")
@@ -661,7 +684,6 @@ def superbench(
     num_benchmark_iteration=5,
     iteration_dynamic_adjust=True,
     logging_level="info",
-    cpu_conversion=True,
     cpu_set_env=True,
     ncore_per_instance=-1,  # only for "self_defined" mode
     ninstances=-1,  # only for "self_defined" mode
@@ -693,6 +715,9 @@ def superbench(
     logger.addHandler(ch)
     logger.addHandler(fh)
 
+    # device detection
+    detect_device_(logger)
+
     # print key inputs
     if auto_quant:
         logger.info(f"Auto-Quant started ...")
@@ -720,6 +745,10 @@ def superbench(
                 f"You have to specify an entry_code of your code: [{code}]")
             quit()
 
+    # detect device compatibility of entry code
+    from .utils.device import detect_code_device_compatibility
+    detect_code_device_compatibility(entry_code)
+
     if sweep_objective == "feature":
         list_FPS = []
         list_accuracy = []
@@ -803,7 +832,8 @@ def superbench(
                 if "pytorch_inc_dynamic_quant" in features and "pytorch_mixed_precision_cpu" in features:
                     continue
 
-                if cpu_conversion:
+                # device conversion
+                if "cpu" in globals.device and "cpu" not in globals.list_code_device_compatibility:
                     features.append("pytorch_cuda_to_cpu")
 
                 if features[0] == "" and len(features) > 1:
@@ -1179,7 +1209,6 @@ def auto_quant(
     num_benchmark_iteration=30,
     iteration_dynamic_adjust=False,
     logging_level="info",
-    cpu_conversion=True,
     cpu_set_env=True,
     ncore_per_instance=-1,  # only for "self_defined" mode
     ninstances=-1,  # only for "self_defined" mode
@@ -1195,7 +1224,6 @@ def auto_quant(
         num_benchmark_iteration=num_benchmark_iteration,
         iteration_dynamic_adjust=iteration_dynamic_adjust,
         logging_level=logging_level,
-        cpu_conversion=cpu_conversion,
         cpu_set_env=cpu_set_env,
         ncore_per_instance=ncore_per_instance,  # only for "self_defined" mode
         ninstances=ninstances,  # only for "self_defined" mode
diff --git a/neural_coder/utils/device.py b/neural_coder/utils/device.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import torch
+
+from .. import globals
+
+
+def detect_device():
+    if torch.cuda.is_available():
+        globals.device = "cuda"
+        # torch.cuda.device_count()
+        # torch.cuda.get_device_name(0)
+        # torch.cuda.get_device_properties(0)
+    elif check_has('clinfo | grep "Intel(R) Graphics"'):
+        globals.device = "intel_gpu"
+    else:
+        if check_has('lscpu | grep "amx"'):
+            globals.device = "cpu_with_amx"
+        else:
+            globals.device = "cpu_without_amx"
+
+
+def check_has(s):
+    cmd = s
+    try:
+        sp = subprocess.Popen(
+            cmd,
+            env=os.environ,
+            shell=True,  # nosec
+            stdout=subprocess.PIPE
+        )  # nosec
+        sp.wait()
+        sp, _ = sp.communicate()
+        has = bool(len(sp.decode()) > 0)  # 0: no, >0: yes
+    except:
+        has = False
+        print('Checking failed.')
+    return has
+
+
+def detect_code_device_compatibility(code_path):
+    # handle github py url
+    if "github.com" in code_path and ".py" in code_path:
+        import requests
+        code_path = code_path.replace("github.com", "raw.githubusercontent.com").replace("/blob","")
+        r = requests.get(code_path)
+        save_py_path = "./neural_coder_workspace/model_analyze_device.py"
+        f = open(save_py_path, "wb")
+        f.write(r.content)
+        code_path = save_py_path
+
+    lines = open(code_path, 'r').read().split('\n')
+    for line in lines:
+        if "torch.cuda.is_available()" in line:
+            globals.list_code_device_compatibility.append("cuda")
+            globals.list_code_device_compatibility.append("cpu")
+        if "--device" in line:
+            if "cpu" in line:
+                globals.list_code_device_compatibility.append("cpu")
+            if "cuda" in line:
+                globals.list_code_device_compatibility.append("cuda")
+            if "gpu" in line:
+                globals.list_code_device_compatibility.append("gpu")
+            if "cpu" not in line and "gpu" not in line and "cuda" not in line:
+                globals.list_code_device_compatibility = ["cpu", "cuda", "gpu"]
+        if "args.cpu" in line:
+            globals.list_code_device_compatibility.append("cpu")
+        if "args.cuda" in line:
+            globals.list_code_device_compatibility.append("cuda")
+        if "args.gpu" in line:
+            globals.list_code_device_compatibility.append("gpu")
+
+    globals.list_code_device_compatibility = \
+        list(set(globals.list_code_device_compatibility))