From 60eb031d398b0a10b3a89ad215e21814d14f2e9d Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 17 Apr 2024 16:12:02 +0800 Subject: [PATCH 01/29] craft --- .../autogen_diopi_wrapper/autogen_diopi_wrapper.py | 8 +------- .../autogen_diopi_wrapper/autogen_wrapped_code.sh | 13 ++++++------- dipu/torch_dipu/csrc_dipu/CMakeLists.txt | 3 +-- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp | 11 ++++++----- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index 84c4aa2ac..067aaea55 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -906,7 +906,7 @@ def functions_code_gen(fun_config): fbody += custom_autograd_function_code fun_name = wrapper_fun_name - if fun_config.get("autocompare", False) in [True, "True"] and fun_config.get( + if fun_config.get("autocompare") not in ["disable"] and fun_config.get( "register_op", True ) in [True, "True"]: auto_compare_fun_name = fun_name + "_autocompare" @@ -1039,12 +1039,6 @@ def parse_args(): type=boolean_string, help="whether generate code that prints op args", ) - parser.add_argument( - "--autocompare", - default=False, - type=boolean_string, - help="whether generate code that compare device calculation results with cpu calculation results", - ) parser.add_argument( "--fun_config_dict", type=json.loads, diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_wrapped_code.sh b/dipu/scripts/autogen_diopi_wrapper/autogen_wrapped_code.sh index 3d6e0dd18..fd6e01b11 100755 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_wrapped_code.sh +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_wrapped_code.sh @@ -5,17 +5,16 @@ DIPU_DIR=$(readlink -f $(dirname $(readlink -f "$0"))/../..) AUTOGEN_DIOPI_WRAPPER=$DIPU_DIR/scripts/autogen_diopi_wrapper -USE_AUTOCOMPARE=${1:-OFF} -UsedVendor=${2:-cuda} -Torch_VERSION=${3:-2.1.0} -GENERATED_KERNELS_SCRIPT=${4:-$AUTOGEN_DIOPI_WRAPPER/autogen_diopi_wrapper.py} -GENERATED_KERNELS_CONFIG=${5:-$AUTOGEN_DIOPI_WRAPPER/diopi_functions.yaml} -GENERATED_KERNELS=${6:-$DIPU_DIR/torch_dipu/csrc_dipu/aten/ops/AutoGenedKernels.cpp} +UsedVendor=${1:-cuda} +Torch_VERSION=${2:-2.1.0} +GENERATED_KERNELS_SCRIPT=${3:-$AUTOGEN_DIOPI_WRAPPER/autogen_diopi_wrapper.py} +GENERATED_KERNELS_CONFIG=${4:-$AUTOGEN_DIOPI_WRAPPER/diopi_functions.yaml} +GENERATED_KERNELS=${5:-$DIPU_DIR/torch_dipu/csrc_dipu/aten/ops/AutoGenedKernels.cpp} GENERATED_KERNELS_VENDOR=${DIPU_DIR}/third_party/DIOPI/impl/${UsedVendor}/convert_config.yaml PYTHON_CMD="python3 ${GENERATED_KERNELS_SCRIPT} --out=${GENERATED_KERNELS} --config=${GENERATED_KERNELS_CONFIG} \ - --autocompare=${USE_AUTOCOMPARE} --print_op_arg=True --use_diopi_adapter=False --print_func_call_info=True \ + --print_op_arg=True --use_diopi_adapter=False --print_func_call_info=True \ --fun_config_dict='{\"current_device\":\"${UsedVendor}\",\"current_torch_ver\":\"${Torch_VERSION}\"}'" if [ -f "$GENERATED_KERNELS_VENDOR" ]; then diff --git a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt index 20bb442fe..7b3ebaa18 100644 --- a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt +++ b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt @@ -1,5 +1,4 @@ #[[ Dependencies ]] -option(USE_AUTOCOMPARE "whether to use USE_AUTOCOMPARE" OFF) # Import Python3::Python, Python3_EXECUTABLE # Also see https://cmake.org/cmake/help/latest/module/FindPython3.html @@ -58,7 +57,7 @@ endif() add_custom_command( OUTPUT "${GENERATED_KERNELS}" - COMMAND bash -c "${AUTOGEN_CODE_SH} ${USE_AUTOCOMPARE} ${UsedVendor} ${Torch_VERSION} ${GENERATED_KERNELS_SCRIPT} ${GENERATED_KERNELS_CONFIG} ${GENERATED_KERNELS}" + COMMAND bash -c "${AUTOGEN_CODE_SH} ${UsedVendor} ${Torch_VERSION} ${GENERATED_KERNELS_SCRIPT} ${GENERATED_KERNELS_CONFIG} ${GENERATED_KERNELS}" COMMENT "Generating ${GENERATED_KERNELS}$<$: with ${GENERATED_KERNELS_VENDOR}>" DEPENDS "${GENERATED_KERNELS_SCRIPT}" diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp index 51d856a81..55ba5a39d 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp @@ -23,9 +23,8 @@ namespace dnative = dipu::native::dipu_aten; namespace dipu { namespace { -std::vector load_fallback_matcher() { - auto constexpr env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; - auto constexpr file_name = ".dipu_force_fallback_op_list.config"; +// load_matcher is used to get regex matcher from env_name and config +std::vector load_matcher(const char* env_name, const char* config_name) { auto append = [](std::istream& input, std::vector& output) { auto constexpr separator = ','; @@ -52,13 +51,15 @@ std::vector load_fallback_matcher() { auto iss = std::istringstream(env); append(iss, list); } - if (auto file = std::ifstream(file_name, std::ios::binary)) { + if (auto file = std::ifstream(config_name, std::ios::binary)) { append(file, list); } return list; } -auto const force_fallback_matchers = load_fallback_matcher(); +const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; +const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; +auto const force_fallback_matchers = load_matcher(fallback_env_name, fallback_config_name); } // end of namespace From 76d96f65700e039ee4eceebbbc487db2b8314194 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Thu, 18 Apr 2024 11:38:49 +0800 Subject: [PATCH 02/29] draft --- dipu/QuickStart.md | 4 +- .../autogen_diopi_wrapper.py | 18 +++- .../diopi_wrapper_template.py | 1 + dipu/scripts/ci/ascend/ci_ascend_script.sh | 8 +- dipu/torch_dipu/csrc_dipu/CMakeLists.txt | 1 + .../csrc_dipu/aten/RegisterDIPU.cpp | 52 ---------- .../csrc_dipu/aten/RegisterDIPU.hpp | 12 +-- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 99 +++++++++++++++++++ .../csrc_dipu/aten/ops/OpRegexMatch.hpp | 21 ++++ 9 files changed, 147 insertions(+), 69 deletions(-) create mode 100644 dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp create mode 100644 dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index f2b91ec51..d6eb541b1 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -185,10 +185,10 @@ python -c "import torch_dipu" 由于该功能默认不开启,使用该功能时需要打开该功能并重新编译DIPU。 -可以通过设置环境变量USE_AUTOCOMPARE=ON,来开启该功能,然后需要重新编译DIPU。 +可以通过设置环境变量USE_GLOBAL_AUTOCOMPARE=ON,来开启该功能,然后需要重新编译DIPU。 ```shell -export USE_AUTOCOMPARE=ON +export USE_GLOBAL_AUTOCOMPARE=ON ``` 以上方法是对所有算子开启自动精度对比。如果只需要对特定算子做精度对比,也可只给需要的算子做精度对比,只需要在相关的配置文件(如 `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml`)给相应的算子添加 `autocompare: True` 即可。 diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index 067aaea55..d5dd1aec7 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -943,9 +943,23 @@ def functions_code_gen(fun_config): fun_name = auto_compare_fun_name if fun_config.get("custom_fallback", False) in ["False", False]: + op_name = get_op_name_from_schema(fun_config["schema"]) + raw_fun_name = fun_name.replace("_autocompare", "") register_body = op_register_template.substitute( - register_name=[get_op_name_from_schema(fun_config["schema"])], - aten_fun_name=["dipu::native::" + fun_name], + register_name=[op_name], + aten_fun_name=[ + "dipu::whetherAutoCompare(" + +'"' + + op_name + +'"' + + ", autocompareMatchers" + + ") ? " + + "dipu::native::" + + fun_name + + " : " + + "dipu::native::" + + raw_fun_name + ], diopi_fun_name=[ get_fun_name_from_cppsignature(diopi_interface).replace( "diopi", "::diopi" diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py index ba723da1b..c468d8589 100644 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py @@ -50,6 +50,7 @@ #include "csrc_dipu/aten/ops/DIPUCopy.hpp" #include "csrc_dipu/aten/ops/NodispatchUtils.hpp" #include "csrc_dipu/aten/ops/OpUtils.hpp" +#include "csrc_dipu/aten/ops/OpRegexMatch.hpp" #include "csrc_dipu/base/basedef.h" #include "csrc_dipu/diopirt/diopirt_impl.h" #include "csrc_dipu/profiler/profiler.h" diff --git a/dipu/scripts/ci/ascend/ci_ascend_script.sh b/dipu/scripts/ci/ascend/ci_ascend_script.sh index 5c976271a..cdb3fc702 100644 --- a/dipu/scripts/ci/ascend/ci_ascend_script.sh +++ b/dipu/scripts/ci/ascend/ci_ascend_script.sh @@ -12,8 +12,8 @@ function build_diopi_lib() { function config_dipu_ascend_cmake() { mkdir -p build && cd ./build cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DWITH_DIOPI_LIBRARY=DISABLE" - if [ -n "$USE_AUTOCOMPARE" ]; then - cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}" + if [ -n "$USE_GLOBAL_AUTOCOMPARE" ]; then + cmake_args+=" -DUSE_GLOBAL_AUTOCOMPARE=${USE_GLOBAL_AUTOCOMPARE}" fi cmake ../ $cmake_args cd ../ @@ -22,8 +22,8 @@ function config_dipu_ascend_cmake() { function config_all_ascend_cmake() { mkdir -p build && cd ./build cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DENABLE_COVERAGE=${USE_COVERAGE} -DWITH_DIOPI=INTERNAL" - if [ -n "$USE_AUTOCOMPARE" ]; then - cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}" + if [ -n "$USE_GLOBAL_AUTOCOMPARE" ]; then + cmake_args+=" -DUSE_GLOBAL_AUTOCOMPARE=${USE_GLOBAL_AUTOCOMPARE}" fi cmake ../ $cmake_args cd ../ diff --git a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt index 7b3ebaa18..457d706f4 100644 --- a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt +++ b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt @@ -75,6 +75,7 @@ set(TORCH_DIPU_SOURCE aten/ops/PinMemoryKernel.cpp aten/ops/EmptyOpsKernel.cpp aten/ops/CustomFallbackFunctionsForCopy.cpp + aten/ops/OpRegexMatch.cpp aten/RegisterDIPU.cpp aten/CPUFallback.cpp diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp index 55ba5a39d..c99cac94e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp @@ -21,58 +21,6 @@ namespace dnative = dipu::native::dipu_aten; namespace dipu { -namespace { - -// load_matcher is used to get regex matcher from env_name and config -std::vector load_matcher(const char* env_name, const char* config_name) { - - auto append = [](std::istream& input, std::vector& output) { - auto constexpr separator = ','; - - auto line = std::string(); - while (std::getline(input, line)) { - auto buffer = std::istringstream(line); - auto pattern = std::string(); - while (std::getline(buffer, pattern, separator)) { - if (pattern.empty()) { - continue; - } - try { - output.emplace_back(pattern); - } catch (const std::regex_error& e) { - TORCH_CHECK(false, e.what()); - } - } - } - }; - - auto list = std::vector(); - if (auto env = std::getenv(env_name)) { - auto iss = std::istringstream(env); - append(iss, list); - } - if (auto file = std::ifstream(config_name, std::ios::binary)) { - append(file, list); - } - return list; -} - -const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; -const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; -auto const force_fallback_matchers = load_matcher(fallback_env_name, fallback_config_name); - -} // end of namespace - -bool get_force_fallback(const char* opname) { - if (force_fallback_matchers.empty() || opname == nullptr) { - return false; - } - - return std::any_of( - force_fallback_matchers.begin(), force_fallback_matchers.end(), - [&opname](auto& matcher) { return std::regex_match(opname, matcher); }); -} - namespace native { void cpu_fallback(const c10::OperatorHandle& op, torch::jit::Stack* stack); } // end of namespace native diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 82f36671b..4ad3c7755 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -9,15 +9,9 @@ #include #include "csrc_dipu/aten/ops/OpUtils.hpp" - -namespace dipu { - -bool get_force_fallback(const char* opname); - -}; // namespace dipu +#include "csrc_dipu/aten/ops/OpRegexMatch.hpp" namespace at { - void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, torch::jit::Stack* stack); @@ -55,7 +49,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, #define DIOPI_ATEN_FUNC(opname, diopiFunc, wapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::get_force_fallback(opname))) { \ + (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ m.impl(opname, TORCH_FN(wapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ @@ -77,7 +71,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, break; \ } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ - !((force_fallback) || dipu::get_force_fallback(opname))) { \ + !((force_fallback) || dipu::whetherOpMatch(opname, fallbackMatchers))) { \ m.impl(opname, TORCH_FN(wapper_func)); \ } else { \ if ((reinterpret_cast(diopi_func) == nullptr)) { \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp new file mode 100644 index 000000000..613a856ef --- /dev/null +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include + +#include + +#include "OpRegexMatch.hpp" + +// loadMatcher is used to get regex matcher from env_name and config +// fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = ".dipu_force_fallback_op_list.config" +// specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; specified_autocompare_config_name = ".specified_autocompare_op_list.config" + +namespace dipu { +std::vector loadMatcher(const char* env_name, const char* config_name) { + auto append = [](std::istream& input, std::vector& output) { + auto constexpr separator = ','; + + auto line = std::string(); + while (std::getline(input, line)) { + auto buffer = std::istringstream(line); + auto pattern = std::string(); + while (std::getline(buffer, pattern, separator)) { + if (pattern.empty()) { + continue; + } + try { + output.emplace_back(pattern); + } catch (const std::regex_error& e) { + TORCH_CHECK(false, e.what()); + } + } + } + }; + + auto list = std::vector(); + if (auto env = std::getenv(env_name)) { + auto iss = std::istringstream(env); + append(iss, list); + } + if (auto file = std::ifstream(config_name, std::ios::binary)) { + append(file, list); + } + return list; +} + +bool whetherOpMatch(const char* opname, std::vector regexMatchers) { + if (regexMatchers.empty() || opname == nullptr) { + return false; + } + + return std::any_of( + regexMatchers.begin(), regexMatchers.end(), + [&opname](auto& matcher) { return std::regex_match(opname, matcher); }); +} + +bool whetherGlobalAutocompare() { + static const char* globalAutocompare = std::getenv("USE_GLOBAL_AUTOCOMPARE"); + if (globalAutocompare == nullptr) { + return false; + } + + std::string globalAutocompareStr(globalAutocompare); + for (char& c : globalAutocompareStr) { + c = static_cast(std::tolower(static_cast(c))); + } + + if (globalAutocompareStr == "on") { + return true; + } + if (globalAutocompareStr == "off") { + return false; + } + + std::cerr << "Error: USE_GLOBAL_AUTOCOMPARE can only be set to 'ON' or 'OFF'.\n"; + return false; +} + +// Whether to enable AutoCompare is based on USE_GLOBAL_AUTOCOMPARE and SPECIFIED_AUTOCOMPARE_OPS_LIST +bool whetherAutoCompare(const char* opname, std::vector autocompareMatchers) { + // if USE_GLOBAL_AUTOCOMPARE is true, global autocompare is enabled regardless the value of SPECIFIED_AUTOCOMPARE_OPS_LIST + if (whetherGlobalAutocompare()) { + return true; + } + // else if opname in SPECIFIED_AUTOCOMPARE_OPS_LIST, the specified op will be autocomapred + // return whetherOpMatch(opname, autocompareMatchers); + return false; + +} +} // end of namespace dipu + +const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; +const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; +std::vector fallbackMatchers = dipu::loadMatcher(fallback_env_name, fallback_config_name); + +const char* specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; +const char* specified_autocompare_config_name = ".specified_autocompare_op_list.config"; +std::vector autocompareMatchers = dipu::loadMatcher(specified_autocompare_env_name, specified_autocompare_config_name); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp new file mode 100644 index 000000000..852db31c0 --- /dev/null +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include + +#include + +namespace dipu{ +std::vector loadMatcher(const char* env_name, const char* config_name); +const bool whetherOpMatch(const char* opname, std::vector regexMatchers); +bool whetherGlobalAutocompare(); +bool whetherAutoCompare(const char* opname, std::vector autocompareMatchers); +} + +extern const char* fallback_env_name; +extern const char* fallback_config_name; +extern std::vector fallbackMatchers; + +extern const char* specified_autocompare_env_name; +extern const char* specified_autocompare_config_name; +extern std::vector autocompareMatchers; From 63a4ff7a5c414d9eabcae68b17b85bdb9a3c0004 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Thu, 18 Apr 2024 14:07:06 +0800 Subject: [PATCH 03/29] fix --- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index 852db31c0..b6aacf361 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -7,7 +7,7 @@ namespace dipu{ std::vector loadMatcher(const char* env_name, const char* config_name); -const bool whetherOpMatch(const char* opname, std::vector regexMatchers); +bool whetherOpMatch(const char* opname, std::vector regexMatchers); bool whetherGlobalAutocompare(); bool whetherAutoCompare(const char* opname, std::vector autocompareMatchers); } From 538cf2fb9a0e36f5345d9dccb95c1088fc61d57a Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Thu, 18 Apr 2024 21:41:51 +0800 Subject: [PATCH 04/29] fix --- .../autogen_diopi_wrapper.py | 43 +++++++++++-------- .../diopi_wrapper_template.py | 4 ++ .../csrc_dipu/aten/RegisterDIPU.hpp | 30 ++++++++++--- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index d5dd1aec7..ae1cf32cb 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -9,6 +9,7 @@ diopi_wrapper_file_template_content, diopi_wrapper_function_template_content, op_register_template_content, + op_register_disable_autocompare_template_content, custom_autograd_template_content, autocompare_template_content, op_with_custom_fallback_register_template_content, @@ -673,6 +674,10 @@ def create_optional_generator_process_code(arg_name): op_register_template = CodeTemplate(op_register_template_content) +op_disable_autocompare_register_template = CodeTemplate( + op_register_disable_autocompare_template_content +) + op_with_custom_fallback_register_template = CodeTemplate( op_with_custom_fallback_register_template_content ) @@ -940,32 +945,35 @@ def functions_code_gen(fun_config): ], ) fbody += autocompare_code - fun_name = auto_compare_fun_name - if fun_config.get("custom_fallback", False) in ["False", False]: - op_name = get_op_name_from_schema(fun_config["schema"]) - raw_fun_name = fun_name.replace("_autocompare", "") + # generate the OP_register code + if fun_config.get("custom_fallback", False) in ["False", False] and fun_config.get( + "autocompare", True + ) in ["True", True]: register_body = op_register_template.substitute( - register_name=[op_name], - aten_fun_name=[ - "dipu::whetherAutoCompare(" - +'"' - + op_name - +'"' - + ", autocompareMatchers" - + ") ? " - + "dipu::native::" - + fun_name - + " : " - + "dipu::native::" - + raw_fun_name + register_name=[get_op_name_from_schema(fun_config["schema"])], + aten_fun_name=["dipu::native::" + fun_name], + diopi_fun_name=[ + get_fun_name_from_cppsignature(diopi_interface).replace( + "diopi", "::diopi" + ) ], + ) + + elif fun_config.get("custom_fallback", False) in [ + "False", + False, + ] and fun_config.get("autocompare") in ["disable"]: + register_body = op_disable_autocompare_register_template.substitute( + register_name=[get_op_name_from_schema(fun_config["schema"])], + aten_fun_name=["dipu::native::" + fun_name], diopi_fun_name=[ get_fun_name_from_cppsignature(diopi_interface).replace( "diopi", "::diopi" ) ], ) + else: register_body = op_with_custom_fallback_register_template.substitute( register_name=[get_op_name_from_schema(fun_config["schema"])], @@ -988,6 +996,7 @@ def functions_code_gen(fun_config): + fun_name.replace("_autocompare", "") ], ) + return fbody, register_body diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py index c468d8589..070c851f0 100644 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py @@ -132,6 +132,10 @@ DIOPI_ATEN_FUNC("$register_name", $diopi_fun_name, $aten_fun_name); """ +op_register_disable_autocompare_template_content = """ +DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE("$register_name", $diopi_fun_name, $aten_fun_name); +""" + op_with_custom_fallback_register_template_content = """ DIOPI_ATEN_FUNC_CUSTOM_FALLBACK("$register_name", $diopi_fun_name, $force_fallback /*whether force fallback*/, $aten_fun_name, $fallbackFunc); """ diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 4ad3c7755..4ad486ea9 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -46,11 +46,31 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // It mat be necessary to determine whether to keep torchop default impl // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wapperFunc) \ +#define addAutoCompare(wrapperFunc) wrapperFunc##_autocompare +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wapperFunc)); \ + (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ + if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(addAutoCompare(wrapperFunc))); \ + } \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } else { \ + if ((reinterpret_cast(diopiFunc) == nullptr)) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ + } else { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ + } \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ + } while (false); + +#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ + do { \ + if ((reinterpret_cast(diopiFunc) != nullptr) && \ + (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ @@ -65,14 +85,14 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // Determine whether to keep torchop default impl for custom ops through // function dipuKeepTorchopDefaultImpl firstly. #define DIOPI_ATEN_FUNC_CUSTOM_FALLBACK(opname, diopi_func, force_fallback, \ - wapper_func, custom_fallback_func) \ + wrapper_func, custom_fallback_func) \ do { \ if (dipu::native::dipuKeepTorchopDefaultImpl(opname)) { \ break; \ } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ !((force_fallback) || dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wapper_func)); \ + m.impl(opname, TORCH_FN(wrapper_func)); \ } else { \ if ((reinterpret_cast(diopi_func) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, "); \ From 5348043e7ddb9cdf464f99066a3126a8685b4fe6 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 13:35:09 +0800 Subject: [PATCH 05/29] update readme --- dipu/QuickStart.md | 47 +++++++++++-------- .../csrc_dipu/aten/RegisterDIPU.hpp | 15 +++--- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 6 +-- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index d6eb541b1..a6d9e5a89 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -158,9 +158,9 @@ sh ./tests/python/run_tests.sh ### 算子库拓展功能 -#### 算子 Fallback - -Fallback 给定算子: +#### 算子Fallback功能 +Fallback指的是使用算子的CPU实现,而非设备实现。 +Fallback给定算子: ```bash export DIPU_FORCE_FALLBACK_OPS_LIST=add.out,conv2d @@ -181,20 +181,10 @@ export DIPU_FORCE_FALLBACK_OPS_LIST='.*' python -c "import torch_dipu" ``` -#### 算子精度自动对比功能介绍 - -由于该功能默认不开启,使用该功能时需要打开该功能并重新编译DIPU。 - -可以通过设置环境变量USE_GLOBAL_AUTOCOMPARE=ON,来开启该功能,然后需要重新编译DIPU。 - -```shell -export USE_GLOBAL_AUTOCOMPARE=ON -``` - -以上方法是对所有算子开启自动精度对比。如果只需要对特定算子做精度对比,也可只给需要的算子做精度对比,只需要在相关的配置文件(如 `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml`)给相应的算子添加 `autocompare: True` 即可。 - +#### 算子精度自动对比功能 +算子精度自动对比功能用于确保算子计算结果的正确性,通过将设备参数拷贝到CPU上,对比CPU和设备的计算结果来判断精度是否达标。以下是算子精度自动对比功能的使用例子: ```shell -$ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback 到 cpu, 可选 +$ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback到CPU, 可选 $ python >>> import torch >>> import torch_dipu @@ -220,11 +210,28 @@ autocompare: add.out other: allclose >>> ``` -可以看到,CPU 计算结果与设备计算结果 `allclose`,也能看到 CPU 和设备计算结果的 `shape`、`dtype` 等信息。特别的,需要注意以下几个问题: +可以看到,输出包括 CPU 和设备计算结果的 `shape`、`stride`、`dtype` 等信息, 最终结果是CPU和设备的self和out都是allclose的。 + +##### 算子精度自动对比功能的设置 +算子精度自动对比功能默认不开启,可以设置环境变量`USE_GLOBAL_AUTOCOMPARE`和`SPECIFIED_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` +- 可以通过设置环境变量`USE_GLOBAL_AUTOCOMPARE=ON`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比,也可以设置为OFF来关闭所有算子的精度自动对比功能 +```shell +# 开启全局的算子精度自动对比功能 +export USE_GLOBAL_AUTOCOMPARE=ON +``` + +- 在未开启`USE_GLOBAL_AUTOCOMPARE`的前提下,可以设置`SPECIFIED_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 + +```shell +# 关闭全局的算子精度自动对比功能,并指定add*算子进行对比 +export USE_GLOBAL_AUTOCOMPARE=OFF +export SPECIFIED_AUTOCOMPARE_OPS_LIST=add* +``` -1. `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml` 中配置了 `autograd:True` 的算子 (`cross_entropy_loss`、`conv2d`、`dropout`、`dropout_`、`linear`) 暂不支持 *backward* 的精度自动对比。如模型精度对不齐,可根据需要先将这几个算子 fallback 到 CPU 来确定问题。 -2. 随机数生成相关的算子(`dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml` 中配置了 `autocompare:False`)没有做 `autocompare`,因为结果总是 `not_allclose`。 -3. 对输入做检查是确保算子输入不被意外修改。 +NOTE: +1. 部分算子并不支持自动精度对比功能,可以查看[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml),其中的`autocompare`配置项为`disable`即不支持自动精度对比功能,同时也可以修改`diopi_functions.yaml`,将某些算子的`autocompare`配置项设置为`disable`来禁用自动对比功能。 +2. `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml` 中配置了 `autograd:True` 的算子 (`cross_entropy_loss`、`conv2d`、`dropout`、`dropout_`、`linear`) 暂不支持 *backward* 的精度自动对比。如模型精度对不齐,可根据需要先将这几个算子 fallback 到 CPU 来确定问题。 +3. 对输入参数(self)做检查是确保算子的输入不被意外修改。 #### 抓取算子参数 diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 4ad486ea9..62d82b506 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -47,14 +47,15 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. #define addAutoCompare(wrapperFunc) wrapperFunc##_autocompare -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(addAutoCompare(wrapperFunc))); \ + if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(addAutoCompare(wrapperFunc))); \ + } else { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ } \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ @@ -66,11 +67,11 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } \ } while (false); -#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ +#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ + (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 613a856ef..17f28ce42 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -84,10 +84,8 @@ bool whetherAutoCompare(const char* opname, std::vector autocompareM return true; } // else if opname in SPECIFIED_AUTOCOMPARE_OPS_LIST, the specified op will be autocomapred - // return whetherOpMatch(opname, autocompareMatchers); - return false; - -} + return whetherOpMatch(opname, autocompareMatchers); + } } // end of namespace dipu const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; From 6d8a8e3bcaf88ad10acc8639372cc237d97e031b Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 14:03:09 +0800 Subject: [PATCH 06/29] fix md lint --- dipu/QuickStart.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index a6d9e5a89..49fc4a9eb 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -158,7 +158,8 @@ sh ./tests/python/run_tests.sh ### 算子库拓展功能 -#### 算子Fallback功能 +#### 算子Fallback功能 + Fallback指的是使用算子的CPU实现,而非设备实现。 Fallback给定算子: @@ -182,7 +183,9 @@ python -c "import torch_dipu" ``` #### 算子精度自动对比功能 + 算子精度自动对比功能用于确保算子计算结果的正确性,通过将设备参数拷贝到CPU上,对比CPU和设备的计算结果来判断精度是否达标。以下是算子精度自动对比功能的使用例子: + ```shell $ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback到CPU, 可选 $ python @@ -213,8 +216,11 @@ autocompare: add.out other: allclose 可以看到,输出包括 CPU 和设备计算结果的 `shape`、`stride`、`dtype` 等信息, 最终结果是CPU和设备的self和out都是allclose的。 ##### 算子精度自动对比功能的设置 + 算子精度自动对比功能默认不开启,可以设置环境变量`USE_GLOBAL_AUTOCOMPARE`和`SPECIFIED_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` + - 可以通过设置环境变量`USE_GLOBAL_AUTOCOMPARE=ON`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比,也可以设置为OFF来关闭所有算子的精度自动对比功能 + ```shell # 开启全局的算子精度自动对比功能 export USE_GLOBAL_AUTOCOMPARE=ON @@ -228,7 +234,8 @@ export USE_GLOBAL_AUTOCOMPARE=OFF export SPECIFIED_AUTOCOMPARE_OPS_LIST=add* ``` -NOTE: +NOTE: + 1. 部分算子并不支持自动精度对比功能,可以查看[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml),其中的`autocompare`配置项为`disable`即不支持自动精度对比功能,同时也可以修改`diopi_functions.yaml`,将某些算子的`autocompare`配置项设置为`disable`来禁用自动对比功能。 2. `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml` 中配置了 `autograd:True` 的算子 (`cross_entropy_loss`、`conv2d`、`dropout`、`dropout_`、`linear`) 暂不支持 *backward* 的精度自动对比。如模型精度对不齐,可根据需要先将这几个算子 fallback 到 CPU 来确定问题。 3. 对输入参数(self)做检查是确保算子的输入不被意外修改。 From 47903157721912cab62f5bc1dc13c38fdd5f67c3 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 14:33:13 +0800 Subject: [PATCH 07/29] fix cpp lint --- .../csrc_dipu/aten/RegisterDIPU.hpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 62d82b506..27dbe7e9f 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -1,15 +1,15 @@ // Copyright (c) 2023, DeepLink. #pragma once +#include + #include #include #include #include -#include - -#include "csrc_dipu/aten/ops/OpUtils.hpp" #include "csrc_dipu/aten/ops/OpRegexMatch.hpp" +#include "csrc_dipu/aten/ops/OpUtils.hpp" namespace at { void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, @@ -51,7 +51,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ + if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ m.impl(opname, TORCH_FN(addAutoCompare(wrapperFunc))); \ } else { \ m.impl(opname, TORCH_FN(wrapperFunc)); \ @@ -62,8 +62,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) \ + << " will be fallback to cpu" << "\n"); \ } \ } while (false); @@ -78,30 +78,31 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) \ + << " will be fallback to cpu" << "\n"); \ } \ } while (false); // Determine whether to keep torchop default impl for custom ops through // function dipuKeepTorchopDefaultImpl firstly. #define DIOPI_ATEN_FUNC_CUSTOM_FALLBACK(opname, diopi_func, force_fallback, \ - wrapper_func, custom_fallback_func) \ + wrapper_func, custom_fallback_func) \ do { \ if (dipu::native::dipuKeepTorchopDefaultImpl(opname)) { \ break; \ } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ - !((force_fallback) || dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapper_func)); \ + !((force_fallback) || \ + dipu::whetherOpMatch(opname, fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapper_func)); \ } else { \ if ((reinterpret_cast(diopi_func) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, "); \ } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) \ + << " will be fallback to cpu" << "\n"); \ m.impl(opname, TORCH_FN(custom_fallback_func)); \ } \ } while (false); From ba73de3dd2abde5641abc13588aea36ecb458341 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 15:04:07 +0800 Subject: [PATCH 08/29] fix readme --- dipu/QuickStart.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index 49fc4a9eb..40f197ee8 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -229,9 +229,9 @@ export USE_GLOBAL_AUTOCOMPARE=ON - 在未开启`USE_GLOBAL_AUTOCOMPARE`的前提下,可以设置`SPECIFIED_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 ```shell -# 关闭全局的算子精度自动对比功能,并指定add*算子进行对比 +# 关闭全局的算子精度自动对比功能,并指定add.*?算子进行对比 export USE_GLOBAL_AUTOCOMPARE=OFF -export SPECIFIED_AUTOCOMPARE_OPS_LIST=add* +export SPECIFIED_AUTOCOMPARE_OPS_LIST=add.*? ``` NOTE: From e97db80eb27aa16b2ccd3ae90a5aaba578a49666 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 15:08:43 +0800 Subject: [PATCH 09/29] fix lint --- .../csrc_dipu/aten/RegisterDIPU.cpp | 13 +++-- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 51 +++++++++++-------- .../csrc_dipu/aten/ops/OpRegexMatch.hpp | 20 ++++---- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp index c99cac94e..639965e1f 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp @@ -1,11 +1,4 @@ // Copyright (c) 2023, DeepLink. -#include "RegisterDIPU.hpp" - -#include -#include -#include -#include - #include #include #include @@ -13,6 +6,12 @@ #include #include +#include +#include +#include +#include + +#include "RegisterDIPU.hpp" #include "csrc_dipu/aten/DIPUATenFunctions.h" #include "csrc_dipu/base/basedef.h" #include "csrc_dipu/profiler/profiler.h" diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 17f28ce42..ca490a1df 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -1,19 +1,22 @@ +#include + #include +#include #include #include -#include #include -#include - #include "OpRegexMatch.hpp" // loadMatcher is used to get regex matcher from env_name and config -// fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = ".dipu_force_fallback_op_list.config" -// specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; specified_autocompare_config_name = ".specified_autocompare_op_list.config" +// fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = +// ".dipu_force_fallback_op_list.config" specified_autocompare_env_name = +// "SPECIFIED_AUTOCOMPARE_OPS_LIST"; specified_autocompare_config_name = +// ".specified_autocompare_op_list.config" namespace dipu { -std::vector loadMatcher(const char* env_name, const char* config_name) { +std::vector loadMatcher(const char* env_name, + const char* config_name) { auto append = [](std::istream& input, std::vector& output) { auto constexpr separator = ','; @@ -73,25 +76,33 @@ bool whetherGlobalAutocompare() { return false; } - std::cerr << "Error: USE_GLOBAL_AUTOCOMPARE can only be set to 'ON' or 'OFF'.\n"; + std::cerr + << "Error: USE_GLOBAL_AUTOCOMPARE can only be set to 'ON' or 'OFF'.\n"; return false; } -// Whether to enable AutoCompare is based on USE_GLOBAL_AUTOCOMPARE and SPECIFIED_AUTOCOMPARE_OPS_LIST -bool whetherAutoCompare(const char* opname, std::vector autocompareMatchers) { - // if USE_GLOBAL_AUTOCOMPARE is true, global autocompare is enabled regardless the value of SPECIFIED_AUTOCOMPARE_OPS_LIST +// Whether to enable AutoCompare is based on USE_GLOBAL_AUTOCOMPARE and +// SPECIFIED_AUTOCOMPARE_OPS_LIST +bool whetherAutoCompare(const char* opname, + std::vector autocompareMatchers) { + // if USE_GLOBAL_AUTOCOMPARE is true, global autocompare is enabled regardless + // the value of SPECIFIED_AUTOCOMPARE_OPS_LIST if (whetherGlobalAutocompare()) { return true; - } - // else if opname in SPECIFIED_AUTOCOMPARE_OPS_LIST, the specified op will be autocomapred - return whetherOpMatch(opname, autocompareMatchers); } -} // end of namespace dipu + // else if opname in SPECIFIED_AUTOCOMPARE_OPS_LIST, the specified op will be + // autocomapred + return whetherOpMatch(opname, autocompareMatchers); +} +} // end of namespace dipu -const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; -const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; -std::vector fallbackMatchers = dipu::loadMatcher(fallback_env_name, fallback_config_name); +const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; +const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; +std::vector fallbackMatchers = + dipu::loadMatcher(fallback_env_name, fallback_config_name); -const char* specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; -const char* specified_autocompare_config_name = ".specified_autocompare_op_list.config"; -std::vector autocompareMatchers = dipu::loadMatcher(specified_autocompare_env_name, specified_autocompare_config_name); +const char* specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; +const char* specified_autocompare_config_name = + ".specified_autocompare_op_list.config"; +std::vector autocompareMatchers = dipu::loadMatcher( + specified_autocompare_env_name, specified_autocompare_config_name); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index b6aacf361..4c4c3e643 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -1,21 +1,23 @@ +#include + #include #include #include #include -#include - -namespace dipu{ -std::vector loadMatcher(const char* env_name, const char* config_name); +namespace dipu { +std::vector loadMatcher(const char* env_name, + const char* config_name); bool whetherOpMatch(const char* opname, std::vector regexMatchers); bool whetherGlobalAutocompare(); -bool whetherAutoCompare(const char* opname, std::vector autocompareMatchers); -} +bool whetherAutoCompare(const char* opname, + std::vector autocompareMatchers); +} // namespace dipu -extern const char* fallback_env_name; -extern const char* fallback_config_name; +extern const char* fallback_env_name; +extern const char* fallback_config_name; extern std::vector fallbackMatchers; -extern const char* specified_autocompare_env_name; +extern const char* specified_autocompare_env_name; extern const char* specified_autocompare_config_name; extern std::vector autocompareMatchers; From 8e95f2fb84f19fd7bab326e45d403f8479cff461 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 15:38:28 +0800 Subject: [PATCH 10/29] fix --- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp | 11 ++++++----- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index ca490a1df..47688810e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -48,7 +48,8 @@ std::vector loadMatcher(const char* env_name, return list; } -bool whetherOpMatch(const char* opname, std::vector regexMatchers) { +bool whetherOpMatch(const char* opname, + const std::vector& regexMatchers) { if (regexMatchers.empty() || opname == nullptr) { return false; } @@ -59,7 +60,7 @@ bool whetherOpMatch(const char* opname, std::vector regexMatchers) { } bool whetherGlobalAutocompare() { - static const char* globalAutocompare = std::getenv("USE_GLOBAL_AUTOCOMPARE"); + const char* globalAutocompare = std::getenv("USE_GLOBAL_AUTOCOMPARE"); if (globalAutocompare == nullptr) { return false; } @@ -84,7 +85,7 @@ bool whetherGlobalAutocompare() { // Whether to enable AutoCompare is based on USE_GLOBAL_AUTOCOMPARE and // SPECIFIED_AUTOCOMPARE_OPS_LIST bool whetherAutoCompare(const char* opname, - std::vector autocompareMatchers) { + const std::vector& autocompareMatchers) { // if USE_GLOBAL_AUTOCOMPARE is true, global autocompare is enabled regardless // the value of SPECIFIED_AUTOCOMPARE_OPS_LIST if (whetherGlobalAutocompare()) { @@ -98,11 +99,11 @@ bool whetherAutoCompare(const char* opname, const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; -std::vector fallbackMatchers = +const std::vector fallbackMatchers = dipu::loadMatcher(fallback_env_name, fallback_config_name); const char* specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; const char* specified_autocompare_config_name = ".specified_autocompare_op_list.config"; -std::vector autocompareMatchers = dipu::loadMatcher( +const std::vector autocompareMatchers = dipu::loadMatcher( specified_autocompare_env_name, specified_autocompare_config_name); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index 4c4c3e643..bf28448e6 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -8,16 +8,17 @@ namespace dipu { std::vector loadMatcher(const char* env_name, const char* config_name); -bool whetherOpMatch(const char* opname, std::vector regexMatchers); +bool whetherOpMatch(const char* opname, + const std::vector& regexMatchers); bool whetherGlobalAutocompare(); bool whetherAutoCompare(const char* opname, - std::vector autocompareMatchers); + const std::vector& autocompareMatchers); } // namespace dipu extern const char* fallback_env_name; extern const char* fallback_config_name; -extern std::vector fallbackMatchers; +extern const std::vector fallbackMatchers; extern const char* specified_autocompare_env_name; extern const char* specified_autocompare_config_name; -extern std::vector autocompareMatchers; +extern const std::vector autocompareMatchers; From 1ba0e04e317c3a932b5a06725fb95a3a3852b6a6 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 15:57:09 +0800 Subject: [PATCH 11/29] rm autcompare CI --- .github/workflows/main.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index db8e01997..9f31ddf6d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -353,21 +353,6 @@ jobs: source scripts/ci/ascend/ci_ascend_env.sh bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \ || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) - - Build-Ascend-910b-with-autocompare: - name: Build-dipu-ascend-910b-with-autocompare - needs: [Build-PyTorch-For-Ascend-910b] - runs-on: tps-ascend-ci-910b - steps: - - name: Build dipu - run: | - set -ex - export USE_COVERAGE=ON - export USE_AUTOCOMPARE=ON - cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu - source scripts/ci/ascend/ci_ascend_env.sh - bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \ - || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 ) Test-Ascend-910b: name: Test-dipu-ascend-910b From f27200b0c94f85175aaf02e3990f50a0a59dd2d8 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 16:44:53 +0800 Subject: [PATCH 12/29] add copyright --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp | 3 ++- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 12 ++++++------ dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp | 5 +++-- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp | 1 + 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp index 639965e1f..4b3f9e7f2 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp @@ -1,4 +1,6 @@ // Copyright (c) 2023, DeepLink. +#include "RegisterDIPU.hpp" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "RegisterDIPU.hpp" #include "csrc_dipu/aten/DIPUATenFunctions.h" #include "csrc_dipu/base/basedef.h" #include "csrc_dipu/profiler/profiler.h" diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 27dbe7e9f..9ad1a024f 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -62,8 +62,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) \ - << " will be fallback to cpu" << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ } \ } while (false); @@ -78,8 +78,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) \ - << " will be fallback to cpu" << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ } \ } while (false); @@ -101,8 +101,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } else { \ DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ } \ - DIPU_OP_LOG_WARNING_ONCE((opname) \ - << " will be fallback to cpu" << "\n"); \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ m.impl(opname, TORCH_FN(custom_fallback_func)); \ } \ } while (false); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 47688810e..2fc1a143e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -1,3 +1,6 @@ +// Copyright (c) 2024, DeepLink. +#include "OpRegexMatch.hpp" + #include #include @@ -6,8 +9,6 @@ #include #include -#include "OpRegexMatch.hpp" - // loadMatcher is used to get regex matcher from env_name and config // fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = // ".dipu_force_fallback_op_list.config" specified_autocompare_env_name = diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index bf28448e6..34e45f70e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -1,3 +1,4 @@ +// Copyright (c) 2024, DeepLink. #include #include From e534ce234f73378ad1026cc9e534a99c9d5310e1 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 16:51:14 +0800 Subject: [PATCH 13/29] fix clang-format --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp | 10 +++++----- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 4 ++-- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp | 4 ++-- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp index 4b3f9e7f2..c99cac94e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp @@ -1,6 +1,11 @@ // Copyright (c) 2023, DeepLink. #include "RegisterDIPU.hpp" +#include +#include +#include +#include + #include #include #include @@ -8,11 +13,6 @@ #include #include -#include -#include -#include -#include - #include "csrc_dipu/aten/DIPUATenFunctions.h" #include "csrc_dipu/base/basedef.h" #include "csrc_dipu/profiler/profiler.h" diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 9ad1a024f..9205d5ab3 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -1,13 +1,13 @@ // Copyright (c) 2023, DeepLink. #pragma once -#include - #include #include #include #include +#include + #include "csrc_dipu/aten/ops/OpRegexMatch.hpp" #include "csrc_dipu/aten/ops/OpUtils.hpp" diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 2fc1a143e..26d9ce693 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -1,14 +1,14 @@ // Copyright (c) 2024, DeepLink. #include "OpRegexMatch.hpp" -#include - #include #include #include #include #include +#include + // loadMatcher is used to get regex matcher from env_name and config // fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = // ".dipu_force_fallback_op_list.config" specified_autocompare_env_name = diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index 34e45f70e..c6025a767 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -1,11 +1,11 @@ // Copyright (c) 2024, DeepLink. -#include - #include #include #include #include +#include + namespace dipu { std::vector loadMatcher(const char* env_name, const char* config_name); From f2d7ff972cf3f97a9e0a5d9405a35f180ac26811 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 19 Apr 2024 17:20:36 +0800 Subject: [PATCH 14/29] fix clang-tidy --- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp | 8 ++++---- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 26d9ce693..d3dcc7688 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -98,13 +98,13 @@ bool whetherAutoCompare(const char* opname, } } // end of namespace dipu -const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; -const char* fallback_config_name = ".dipu_force_fallback_op_list.config"; +const char* const fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; +const char* const fallback_config_name = ".dipu_force_fallback_op_list.config"; const std::vector fallbackMatchers = dipu::loadMatcher(fallback_env_name, fallback_config_name); -const char* specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; -const char* specified_autocompare_config_name = +const char* const specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; +const char* const specified_autocompare_config_name = ".specified_autocompare_op_list.config"; const std::vector autocompareMatchers = dipu::loadMatcher( specified_autocompare_env_name, specified_autocompare_config_name); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index c6025a767..fd6c8f158 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -16,10 +16,10 @@ bool whetherAutoCompare(const char* opname, const std::vector& autocompareMatchers); } // namespace dipu -extern const char* fallback_env_name; -extern const char* fallback_config_name; +extern const char* const fallback_env_name; +extern const char* const fallback_config_name; extern const std::vector fallbackMatchers; -extern const char* specified_autocompare_env_name; -extern const char* specified_autocompare_config_name; +extern const char* const specified_autocompare_env_name; +extern const char* const specified_autocompare_config_name; extern const std::vector autocompareMatchers; From 5598c1ba7427ef2e270fbd3e475480dffac3fcc4 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Mon, 22 Apr 2024 10:46:03 +0800 Subject: [PATCH 15/29] fix lint --- dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index d3dcc7688..9a199873a 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -103,7 +103,8 @@ const char* const fallback_config_name = ".dipu_force_fallback_op_list.config"; const std::vector fallbackMatchers = dipu::loadMatcher(fallback_env_name, fallback_config_name); -const char* const specified_autocompare_env_name = "SPECIFIED_AUTOCOMPARE_OPS_LIST"; +const char* const specified_autocompare_env_name = + "SPECIFIED_AUTOCOMPARE_OPS_LIST"; const char* const specified_autocompare_config_name = ".specified_autocompare_op_list.config"; const std::vector autocompareMatchers = dipu::loadMatcher( From 3c63c9fa00325c777ff818641f9a3067400baba8 Mon Sep 17 00:00:00 2001 From: ZhangQiu <100055343+NeosZhang@users.noreply.github.com> Date: Mon, 22 Apr 2024 12:39:47 +0800 Subject: [PATCH 16/29] Update dipu/QuickStart.md Co-authored-by: Lingjie --- dipu/QuickStart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index 40f197ee8..5cc613c14 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -187,7 +187,7 @@ python -c "import torch_dipu" 算子精度自动对比功能用于确保算子计算结果的正确性,通过将设备参数拷贝到CPU上,对比CPU和设备的计算结果来判断精度是否达标。以下是算子精度自动对比功能的使用例子: ```shell -$ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback到CPU, 可选 +$ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback 到 CPU, 可选 $ python >>> import torch >>> import torch_dipu From 5cdba1a56861bf3fc326409b7358f155095f40f0 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Mon, 22 Apr 2024 17:38:26 +0800 Subject: [PATCH 17/29] remove ENV USE_GLOBAL_AUTOCOMPARE --- dipu/QuickStart.md | 18 +++--- .../autogen_diopi_wrapper.py | 6 +- .../csrc_dipu/aten/RegisterDIPU.hpp | 64 +++++++++---------- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 53 +++------------ .../csrc_dipu/aten/ops/OpRegexMatch.hpp | 7 +- 5 files changed, 56 insertions(+), 92 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index 40f197ee8..38469b774 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -184,10 +184,11 @@ python -c "import torch_dipu" #### 算子精度自动对比功能 -算子精度自动对比功能用于确保算子计算结果的正确性,通过将设备参数拷贝到CPU上,对比CPU和设备的计算结果来判断精度是否达标。以下是算子精度自动对比功能的使用例子: +算子精度自动对比功能(autocompare)用于确保算子计算结果的正确性,通过将设备参数拷贝到CPU上,对比CPU和设备的计算结果来判断精度是否达标。以下是算子精度自动对比功能的使用例子: ```shell $ unset DIPU_FORCE_FALLBACK_OPS_LIST # 主要是确保要比较的算子没有强制 fallback到CPU, 可选 +$ export DIPU_AUTOCOMPARE_OPS_LIST=add.out # 对add.out算子开启autocompare功能 $ python >>> import torch >>> import torch_dipu @@ -217,21 +218,22 @@ autocompare: add.out other: allclose ##### 算子精度自动对比功能的设置 -算子精度自动对比功能默认不开启,可以设置环境变量`USE_GLOBAL_AUTOCOMPARE`和`SPECIFIED_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` +算子精度自动对比功能默认不开启,可以设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` -- 可以通过设置环境变量`USE_GLOBAL_AUTOCOMPARE=ON`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比,也可以设置为OFF来关闭所有算子的精度自动对比功能 +- 可以通过设置环境变量`SPECIFIED_AUTOCOMPARE_OPS_LIST=‘.*’`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比。 ```shell # 开启全局的算子精度自动对比功能 -export USE_GLOBAL_AUTOCOMPARE=ON +export SPECIFIED_AUTOCOMPARE_OPS_LIST=‘.*’ ``` -- 在未开启`USE_GLOBAL_AUTOCOMPARE`的前提下,可以设置`SPECIFIED_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 +- 可以设置`DIPU_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配,也可以指定多个算子开启自动精度对比。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 ```shell -# 关闭全局的算子精度自动对比功能,并指定add.*?算子进行对比 -export USE_GLOBAL_AUTOCOMPARE=OFF -export SPECIFIED_AUTOCOMPARE_OPS_LIST=add.*? +# 指定匹配add.*?的算子进行自动精度对比 +export DIPU_AUTOCOMPARE_OPS_LIST=add.*? +# 指定add.out、sub.out算子进行自动精度对比 +export DIPU_AUTOCOMPARE_OPS_LIST="add.out, sub.out" ``` NOTE: diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index ae1cf32cb..b4bbca03d 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -947,6 +947,7 @@ def functions_code_gen(fun_config): fbody += autocompare_code # generate the OP_register code + # case 1: custom_fallback=False and autocompare not disabled if fun_config.get("custom_fallback", False) in ["False", False] and fun_config.get( "autocompare", True ) in ["True", True]: @@ -960,6 +961,7 @@ def functions_code_gen(fun_config): ], ) + # case2: custom_fallback=False and autocompare=disable elif fun_config.get("custom_fallback", False) in [ "False", False, @@ -973,8 +975,8 @@ def functions_code_gen(fun_config): ) ], ) - - else: + # case3: custom_fallback=True + elif fun_config.get("custom_fallback", False) in ["True", True]: register_body = op_with_custom_fallback_register_template.substitute( register_name=[get_op_name_from_schema(fun_config["schema"])], aten_fun_name=["dipu::native::" + fun_name], diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 9205d5ab3..ac3647d26 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -46,41 +46,36 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // It mat be necessary to determine whether to keep torchop default impl // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. -#define addAutoCompare(wrapperFunc) wrapperFunc##_autocompare -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - if (dipu::whetherAutoCompare(opname, autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(addAutoCompare(wrapperFunc))); \ - } else { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } \ - } else { \ - if ((reinterpret_cast(diopiFunc) == nullptr)) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ - } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ + do { \ + if ((reinterpret_cast(diopiFunc) != nullptr) && \ + (!dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::fallbackMatchers))) { \ + if (dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ + } else { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } \ + } else { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ + \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ } while (false); -#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::whetherOpMatch(opname, fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } else { \ - if ((reinterpret_cast(diopiFunc) == nullptr)) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ - } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ + do { \ + if ((reinterpret_cast(diopiFunc) != nullptr) && \ + (!dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } else { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ } while (false); // Determine whether to keep torchop default impl for custom ops through @@ -93,7 +88,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ !((force_fallback) || \ - dipu::whetherOpMatch(opname, fallbackMatchers))) { \ + dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::fallbackMatchers))) { \ m.impl(opname, TORCH_FN(wrapper_func)); \ } else { \ if ((reinterpret_cast(diopi_func) == nullptr)) { \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 9a199873a..4d6f40b8d 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -12,10 +12,11 @@ // loadMatcher is used to get regex matcher from env_name and config // fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; fallback_config_name = // ".dipu_force_fallback_op_list.config" specified_autocompare_env_name = -// "SPECIFIED_AUTOCOMPARE_OPS_LIST"; specified_autocompare_config_name = +// "DIPU_AUTOCOMPARE_OPS_LIST"; specified_autocompare_config_name = // ".specified_autocompare_op_list.config" namespace dipu { +namespace opRegexMatch { std::vector loadMatcher(const char* env_name, const char* config_name) { auto append = [](std::istream& input, std::vector& output) { @@ -60,52 +61,16 @@ bool whetherOpMatch(const char* opname, [&opname](auto& matcher) { return std::regex_match(opname, matcher); }); } -bool whetherGlobalAutocompare() { - const char* globalAutocompare = std::getenv("USE_GLOBAL_AUTOCOMPARE"); - if (globalAutocompare == nullptr) { - return false; - } - - std::string globalAutocompareStr(globalAutocompare); - for (char& c : globalAutocompareStr) { - c = static_cast(std::tolower(static_cast(c))); - } - - if (globalAutocompareStr == "on") { - return true; - } - if (globalAutocompareStr == "off") { - return false; - } - - std::cerr - << "Error: USE_GLOBAL_AUTOCOMPARE can only be set to 'ON' or 'OFF'.\n"; - return false; -} - -// Whether to enable AutoCompare is based on USE_GLOBAL_AUTOCOMPARE and -// SPECIFIED_AUTOCOMPARE_OPS_LIST -bool whetherAutoCompare(const char* opname, - const std::vector& autocompareMatchers) { - // if USE_GLOBAL_AUTOCOMPARE is true, global autocompare is enabled regardless - // the value of SPECIFIED_AUTOCOMPARE_OPS_LIST - if (whetherGlobalAutocompare()) { - return true; - } - // else if opname in SPECIFIED_AUTOCOMPARE_OPS_LIST, the specified op will be - // autocomapred - return whetherOpMatch(opname, autocompareMatchers); -} -} // end of namespace dipu - const char* const fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; const char* const fallback_config_name = ".dipu_force_fallback_op_list.config"; const std::vector fallbackMatchers = - dipu::loadMatcher(fallback_env_name, fallback_config_name); + dipu::opRegexMatch::loadMatcher(fallback_env_name, fallback_config_name); -const char* const specified_autocompare_env_name = - "SPECIFIED_AUTOCOMPARE_OPS_LIST"; +const char* const specified_autocompare_env_name = "DIPU_AUTOCOMPARE_OPS_LIST"; const char* const specified_autocompare_config_name = ".specified_autocompare_op_list.config"; -const std::vector autocompareMatchers = dipu::loadMatcher( - specified_autocompare_env_name, specified_autocompare_config_name); +const std::vector autocompareMatchers = + dipu::opRegexMatch::loadMatcher(specified_autocompare_env_name, + specified_autocompare_config_name); +} // namespace opRegexMatch +} // namespace dipu diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index fd6c8f158..d84815604 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -7,14 +7,11 @@ #include namespace dipu { +namespace opRegexMatch { std::vector loadMatcher(const char* env_name, const char* config_name); bool whetherOpMatch(const char* opname, const std::vector& regexMatchers); -bool whetherGlobalAutocompare(); -bool whetherAutoCompare(const char* opname, - const std::vector& autocompareMatchers); -} // namespace dipu extern const char* const fallback_env_name; extern const char* const fallback_config_name; @@ -23,3 +20,5 @@ extern const std::vector fallbackMatchers; extern const char* const specified_autocompare_env_name; extern const char* const specified_autocompare_config_name; extern const std::vector autocompareMatchers; +} // namespace opRegexMatch +} // namespace dipu From 458233c88380a862d19d749c69837916675ee49e Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Mon, 22 Apr 2024 18:41:11 +0800 Subject: [PATCH 18/29] fix --- .../csrc_dipu/aten/RegisterDIPU.hpp | 63 ++++++++++--------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index ac3647d26..e573d9f2e 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -46,36 +46,43 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // It mat be necessary to determine whether to keep torchop default impl // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::fallbackMatchers))) { \ - if (dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ - } else { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ + do { \ + if ((reinterpret_cast(diopiFunc) != nullptr) && \ + (!dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::fallbackMatchers))) { \ + if (dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ + } else { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } \ + } else { \ + if ((reinterpret_cast(diopiFunc) == nullptr)) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ + } else { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ + } \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ } while (false); -#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ + do { \ + if ((reinterpret_cast(diopiFunc) != nullptr) && \ + (!dipu::opRegexMatch::whetherOpMatch( \ + opname, dipu::opRegexMatch::fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } else { \ + if ((reinterpret_cast(diopiFunc) == nullptr)) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ + } else { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ + } \ + DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ } while (false); // Determine whether to keep torchop default impl for custom ops through From 389a2678925ba4f00a03d44d66d447c9e8b77a56 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Tue, 23 Apr 2024 10:15:26 +0800 Subject: [PATCH 19/29] fix --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index e573d9f2e..c4dbfb300 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -1,6 +1,9 @@ // Copyright (c) 2023, DeepLink. #pragma once +#include +#include // for printf +#include // for std::getenv #include #include #include From c8dbb34b779448f01c30883d6117d2e32421536d Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Tue, 23 Apr 2024 16:54:44 +0800 Subject: [PATCH 20/29] fix readme --- dipu/QuickStart.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index b7831b375..e2510d377 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -220,11 +220,11 @@ autocompare: add.out other: allclose 算子精度自动对比功能默认不开启,可以设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` -- 可以通过设置环境变量`SPECIFIED_AUTOCOMPARE_OPS_LIST=‘.*’`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比。 +- 可以通过设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST=‘.*’`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比。 ```shell # 开启全局的算子精度自动对比功能 -export SPECIFIED_AUTOCOMPARE_OPS_LIST=‘.*’ +export DIPU_AUTOCOMPARE_OPS_LIST=‘.*’ ``` - 可以设置`DIPU_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配,也可以指定多个算子开启自动精度对比。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 From 8d40cf978adbf4f0372acf9928665ce42bfd36d8 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 11:31:21 +0800 Subject: [PATCH 21/29] fix --- dipu/QuickStart.md | 4 ++-- .../autogen_diopi_wrapper.py | 13 +++++------ dipu/scripts/ci/ascend/ci_ascend_script.sh | 6 ----- .../csrc_dipu/aten/RegisterDIPU.hpp | 23 +++++++++++-------- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 10 ++++---- .../csrc_dipu/aten/ops/OpRegexMatch.hpp | 6 ++--- 6 files changed, 30 insertions(+), 32 deletions(-) diff --git a/dipu/QuickStart.md b/dipu/QuickStart.md index e2510d377..7e19ee8d2 100644 --- a/dipu/QuickStart.md +++ b/dipu/QuickStart.md @@ -220,11 +220,11 @@ autocompare: add.out other: allclose 算子精度自动对比功能默认不开启,可以设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST`来控制该功能,在开启算子自动对比功能前,必须unset `DIPU_FORCE_FALLBACK_OPS_LIST` -- 可以通过设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST=‘.*’`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比。 +- 可以通过设置环境变量`DIPU_AUTOCOMPARE_OPS_LIST='.*'`,开启全局的精度对比,这种情况下所有调用的算子都会进行精度对比。 ```shell # 开启全局的算子精度自动对比功能 -export DIPU_AUTOCOMPARE_OPS_LIST=‘.*’ +export DIPU_AUTOCOMPARE_OPS_LIST='.*' ``` - 可以设置`DIPU_AUTOCOMPARE_OPS_LIST`来指定算子开启自动精度对比,支持正则表达式匹配,也可以指定多个算子开启自动精度对比。算子名可以参考[diopi_functions.yaml](https://github.com/DeepLink-org/deeplink.framework/blob/main/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml)。 diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index b4bbca03d..604af956d 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -948,6 +948,7 @@ def functions_code_gen(fun_config): # generate the OP_register code # case 1: custom_fallback=False and autocompare not disabled + register_body = "" if fun_config.get("custom_fallback", False) in ["False", False] and fun_config.get( "autocompare", True ) in ["True", True]: @@ -975,8 +976,10 @@ def functions_code_gen(fun_config): ) ], ) - # case3: custom_fallback=True - elif fun_config.get("custom_fallback", False) in ["True", True]: + # case3: custom_fallback=True and autocompare not disable + elif fun_config.get("custom_fallback", False) in ["True", True] and fun_config.get( + "autocompare", True + ) in ["True", True]: register_body = op_with_custom_fallback_register_template.substitute( register_name=[get_op_name_from_schema(fun_config["schema"])], aten_fun_name=["dipu::native::" + fun_name], @@ -992,11 +995,7 @@ def functions_code_gen(fun_config): else "true" ) ], - fallbackFunc=[ - "dipu::native::" - + "custom_fallback_" - + fun_name.replace("_autocompare", "") - ], + fallbackFunc=["dipu::native::" + "custom_fallback_" + fun_name], ) return fbody, register_body diff --git a/dipu/scripts/ci/ascend/ci_ascend_script.sh b/dipu/scripts/ci/ascend/ci_ascend_script.sh index cdb3fc702..c1c986c71 100644 --- a/dipu/scripts/ci/ascend/ci_ascend_script.sh +++ b/dipu/scripts/ci/ascend/ci_ascend_script.sh @@ -12,9 +12,6 @@ function build_diopi_lib() { function config_dipu_ascend_cmake() { mkdir -p build && cd ./build cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DWITH_DIOPI_LIBRARY=DISABLE" - if [ -n "$USE_GLOBAL_AUTOCOMPARE" ]; then - cmake_args+=" -DUSE_GLOBAL_AUTOCOMPARE=${USE_GLOBAL_AUTOCOMPARE}" - fi cmake ../ $cmake_args cd ../ } @@ -22,9 +19,6 @@ function config_dipu_ascend_cmake() { function config_all_ascend_cmake() { mkdir -p build && cd ./build cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DENABLE_COVERAGE=${USE_COVERAGE} -DWITH_DIOPI=INTERNAL" - if [ -n "$USE_GLOBAL_AUTOCOMPARE" ]; then - cmake_args+=" -DUSE_GLOBAL_AUTOCOMPARE=${USE_GLOBAL_AUTOCOMPARE}" - fi cmake ../ $cmake_args cd ../ } diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index c4dbfb300..431503672 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -52,10 +52,10 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, #define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::fallbackMatchers))) { \ - if (dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::autocompareMatchers)) { \ + (!dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ } else { \ m.impl(opname, TORCH_FN(wrapperFunc)); \ @@ -74,8 +74,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, #define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::fallbackMatchers))) { \ + (!dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ m.impl(opname, TORCH_FN(wrapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ @@ -98,9 +98,14 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ !((force_fallback) || \ - dipu::opRegexMatch::whetherOpMatch( \ - opname, dipu::opRegexMatch::fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapper_func)); \ + dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(wrapper_func##_autocompare)); \ + } else { \ + m.impl(opname, TORCH_FN(wrapper_func)); \ + } \ } else { \ if ((reinterpret_cast(diopi_func) == nullptr)) { \ DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, "); \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 4d6f40b8d..d538a8955 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -16,7 +16,7 @@ // ".specified_autocompare_op_list.config" namespace dipu { -namespace opRegexMatch { +namespace op_regex_match { std::vector loadMatcher(const char* env_name, const char* config_name) { auto append = [](std::istream& input, std::vector& output) { @@ -50,7 +50,7 @@ std::vector loadMatcher(const char* env_name, return list; } -bool whetherOpMatch(const char* opname, +bool isOpMatch(const char* opname, const std::vector& regexMatchers) { if (regexMatchers.empty() || opname == nullptr) { return false; @@ -64,13 +64,13 @@ bool whetherOpMatch(const char* opname, const char* const fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; const char* const fallback_config_name = ".dipu_force_fallback_op_list.config"; const std::vector fallbackMatchers = - dipu::opRegexMatch::loadMatcher(fallback_env_name, fallback_config_name); + dipu::op_regex_match::loadMatcher(fallback_env_name, fallback_config_name); const char* const specified_autocompare_env_name = "DIPU_AUTOCOMPARE_OPS_LIST"; const char* const specified_autocompare_config_name = ".specified_autocompare_op_list.config"; const std::vector autocompareMatchers = - dipu::opRegexMatch::loadMatcher(specified_autocompare_env_name, + dipu::op_regex_match::loadMatcher(specified_autocompare_env_name, specified_autocompare_config_name); -} // namespace opRegexMatch +} // namespace op_regex_match } // namespace dipu diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index d84815604..f1f7bfa83 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -7,10 +7,10 @@ #include namespace dipu { -namespace opRegexMatch { +namespace op_regex_match { std::vector loadMatcher(const char* env_name, const char* config_name); -bool whetherOpMatch(const char* opname, +bool isOpMatch(const char* opname, const std::vector& regexMatchers); extern const char* const fallback_env_name; @@ -20,5 +20,5 @@ extern const std::vector fallbackMatchers; extern const char* const specified_autocompare_env_name; extern const char* const specified_autocompare_config_name; extern const std::vector autocompareMatchers; -} // namespace opRegexMatch +} // namespace op_regex_match } // namespace dipu From d5a38d0e5bd457f42fcb6c810c9de9976107e70f Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 14:50:22 +0800 Subject: [PATCH 22/29] add directMemCopyH2H --- .../diopi_functions.yaml | 2 -- .../csrc_dipu/aten/RegisterDIPU.hpp | 20 +++++------ .../csrc_dipu/aten/ops/DIPUCopy.hpp | 36 +++++++++++++++---- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 4 +-- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml index 16d324dd0..cf162e07d 100755 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml @@ -2748,7 +2748,6 @@ # this copy_ aten op may use both diopiCastDtype and diopiCopyInp. it's a proxy/composite op - schema: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) - autocompare: disable dummy_call_diopi: True custom_fallback: True device: [cuda, camb, ascend, droplet, supa, kunlunxin] @@ -2760,7 +2759,6 @@ # vendor who has no fully implemented diopi and proper fallback DIPUCopy sub-class - schema: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) - autocompare: disable custom_fallback: True dummy_call_diopi: True custom_code_at_the_beginning: | diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 431503672..a3fb2e010 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -52,10 +52,10 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, #define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ + (!dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ } else { \ m.impl(opname, TORCH_FN(wrapperFunc)); \ @@ -74,8 +74,8 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, #define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ do { \ if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ + (!dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ m.impl(opname, TORCH_FN(wrapperFunc)); \ } else { \ if ((reinterpret_cast(diopiFunc) == nullptr)) { \ @@ -98,10 +98,10 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, } \ if ((reinterpret_cast(diopi_func) != nullptr) && \ !((force_fallback) || \ - dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ + dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ m.impl(opname, TORCH_FN(wrapper_func##_autocompare)); \ } else { \ m.impl(opname, TORCH_FN(wrapper_func)); \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp index 41bde2531..c807fe065 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp @@ -35,6 +35,8 @@ enum class DIPUCopyType { D2H, // from host to device H2D, + // from host to host + H2H, }; // Align with pytorch's behavior, see TensorIterator.cpp compute_mem_overlaps() @@ -59,16 +61,23 @@ inline void tryRecordStream(const at::Tensor& tensor, DIPUStream& curStream, inline DIPUCopyType getCopyType(const at::Tensor& dst, const at::Tensor& src) { bool isSrcDevice = dipu::isDeviceTensor(src); bool isDstDevice = dipu::isDeviceTensor(dst); - if (!isSrcDevice) { - return DIPUCopyType::H2D; // this op not handle h2h, dest always device + if (!isSrcDevice && isDstDevice) { + return DIPUCopyType::H2D; } - if (!isDstDevice) { - return DIPUCopyType::D2H; // here src always device + if (!isDstDevice && isSrcDevice) { + return DIPUCopyType::D2H; } - if (src.device().index() != dst.device().index()) { + if (isSrcDevice && isDstDevice && + src.device().index() != dst.device().index()) { return DIPUCopyType::D2OtherD; } - return DIPUCopyType::D2Self; + if (isSrcDevice && isDstDevice && + src.device().index() == dst.device().index()) { + return DIPUCopyType::D2Self; + } + if (!isSrcDevice && !isDstDevice) { + return DIPUCopyType::H2H; + } } inline int64_t getMemCopyBytes(const at::Tensor& dst, const at::Tensor& src, @@ -117,6 +126,17 @@ inline void doMemCopyD2H(const at::Tensor& dst, const at::Tensor& src, } } +inline void doMemCopyH2H(const at::Tensor& dst, const at::Tensor& src, + int64_t nbytes) { + if (!dst.is_contiguous() || !src.is_contiguous()) { + std::cerr << "Tensors must be contiguous for memory copy." << std::endl; + return; + } + void* src_ptr = src.data_ptr(); + void* dst_ptr = dst.data_ptr(); + memcpy(dst_ptr, src_ptr, nbytes); +} + inline void doMemCopyD2D(const at::Tensor& dst, const at::Tensor& src, dipu::DIPUStream& stream, int64_t nbytes, bool isSynchronousCopy) { @@ -148,6 +168,9 @@ inline void memCopy(const at::Tensor& dst, const at::Tensor& src, // dst is cpu. doMemCopyD2H(dst, src, stream, nbytes, isSynchronousCopy); break; + case DIPUCopyType::H2H: + doMemCopyH2H(dst, src, nbytes); + break; default: // device to device doMemCopyD2D(dst, src, stream, nbytes, isSynchronousCopy); } @@ -294,7 +317,6 @@ class DIPUCopyInplace : public DIPUCopyBase { if (native::dumpOpArgLevel() > 0) { printf("--%-50s %-30s \n", "[copy_]:", "doDirectMemCopy"); } - memCopy(dst, src, curStream, copyType, /*nonOverlappingAndDense=*/true, /*isSynchronousCopy=*/false); diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index d538a8955..39b031d3f 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -51,7 +51,7 @@ std::vector loadMatcher(const char* env_name, } bool isOpMatch(const char* opname, - const std::vector& regexMatchers) { + const std::vector& regexMatchers) { if (regexMatchers.empty() || opname == nullptr) { return false; } @@ -71,6 +71,6 @@ const char* const specified_autocompare_config_name = ".specified_autocompare_op_list.config"; const std::vector autocompareMatchers = dipu::op_regex_match::loadMatcher(specified_autocompare_env_name, - specified_autocompare_config_name); + specified_autocompare_config_name); } // namespace op_regex_match } // namespace dipu From 16585dbea9687d8571a482d81c149b480514b907 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 17:40:27 +0800 Subject: [PATCH 23/29] fix --- .../autogen_diopi_wrapper.py | 3 + .../csrc_dipu/aten/RegisterDIPU.hpp | 124 +++++++++--------- .../csrc_dipu/aten/ops/OpRegexMatch.cpp | 10 +- .../csrc_dipu/aten/ops/OpRegexMatch.hpp | 8 +- 4 files changed, 73 insertions(+), 72 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index 604af956d..42bcca8e2 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -459,6 +459,9 @@ def create_call_aten_cpu_cpp_function_code_from_config(fun_config): opname = re.sub("\.correction", "", opname) opname = re.sub("\.input", "", opname) opname = re.sub("\.dim_IntList", "", opname) + opname = re.sub("\.dim", "", opname) + opname = re.sub("\.mode", "", opname) + opname = opname.replace(".", "_") opname = opname.split(".")[0] if opname[-1] == "_" and len(get_function_return_param_from_schema(schema)) > 0: diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index a3fb2e010..8062fe9a4 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -49,73 +49,75 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // It mat be necessary to determine whether to keep torchop default impl // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ - } else { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } \ - } else { \ - if ((reinterpret_cast(diopiFunc) == nullptr)) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ - } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ + do { \ + if (reinterpret_cast(diopiFunc) == nullptr) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, " \ + << (opname) \ + << " will be fallback to cpu" \ + << "\n"); \ + } else if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers)) { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ + << (opname) << " will be fallback to cpu" \ + << "\n"); \ + } else if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ + } else { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } \ } while (false); -#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ - do { \ - if ((reinterpret_cast(diopiFunc) != nullptr) && \ - (!dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } else { \ - if ((reinterpret_cast(diopiFunc) == nullptr)) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, "); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ - } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ +#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ + do { \ + if (reinterpret_cast(diopiFunc) == nullptr) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, " \ + << (opname) \ + << " will be fallback to cpu" \ + << "\n"); \ + } else if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers)) { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ + << (opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ + \ + if ((!dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + } \ } while (false); // Determine whether to keep torchop default impl for custom ops through // function dipuKeepTorchopDefaultImpl firstly. -#define DIOPI_ATEN_FUNC_CUSTOM_FALLBACK(opname, diopi_func, force_fallback, \ - wrapper_func, custom_fallback_func) \ - do { \ - if (dipu::native::dipuKeepTorchopDefaultImpl(opname)) { \ - break; \ - } \ - if ((reinterpret_cast(diopi_func) != nullptr) && \ - !((force_fallback) || \ - dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(wrapper_func##_autocompare)); \ - } else { \ - m.impl(opname, TORCH_FN(wrapper_func)); \ - } \ - } else { \ - if ((reinterpret_cast(diopi_func) == nullptr)) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, "); \ - } else { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, "); \ - } \ - DIPU_OP_LOG_WARNING_ONCE((opname) << " will be fallback to cpu" \ - << "\n"); \ - m.impl(opname, TORCH_FN(custom_fallback_func)); \ - } \ +#define DIOPI_ATEN_FUNC_CUSTOM_FALLBACK(opname, diopi_func, force_fallback, \ + wrapper_func, custom_fallback_func) \ + do { \ + if (dipu::native::dipuKeepTorchopDefaultImpl(opname)) { \ + break; \ + } \ + if (reinterpret_cast(diopi_func) == nullptr) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, " \ + << (opname) \ + << " will be fallback to cpu" \ + << "\n"); \ + } \ + \ + else if (((force_fallback) || \ + dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers))) { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ + << (opname) << " will be fallback to cpu" \ + << "\n"); \ + } \ + else if (dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers)) { \ + m.impl(opname, TORCH_FN(wrapper_func##_autocompare)); \ + } \ + else { \ + m.impl(opname, TORCH_FN(wrapper_func)); \ + } \ } while (false); class DIPUOpRegister { diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp index 39b031d3f..1e0c02cef 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.cpp @@ -61,13 +61,15 @@ bool isOpMatch(const char* opname, [&opname](auto& matcher) { return std::regex_match(opname, matcher); }); } -const char* const fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; -const char* const fallback_config_name = ".dipu_force_fallback_op_list.config"; +constexpr const char* fallback_env_name = "DIPU_FORCE_FALLBACK_OPS_LIST"; +constexpr const char* fallback_config_name = + ".dipu_force_fallback_op_list.config"; const std::vector fallbackMatchers = dipu::op_regex_match::loadMatcher(fallback_env_name, fallback_config_name); -const char* const specified_autocompare_env_name = "DIPU_AUTOCOMPARE_OPS_LIST"; -const char* const specified_autocompare_config_name = +constexpr const char* specified_autocompare_env_name = + "DIPU_AUTOCOMPARE_OPS_LIST"; +constexpr const char* specified_autocompare_config_name = ".specified_autocompare_op_list.config"; const std::vector autocompareMatchers = dipu::op_regex_match::loadMatcher(specified_autocompare_env_name, diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp index f1f7bfa83..2d8dda86d 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/OpRegexMatch.hpp @@ -11,14 +11,8 @@ namespace op_regex_match { std::vector loadMatcher(const char* env_name, const char* config_name); bool isOpMatch(const char* opname, - const std::vector& regexMatchers); - -extern const char* const fallback_env_name; -extern const char* const fallback_config_name; + const std::vector& regexMatchers); extern const std::vector fallbackMatchers; - -extern const char* const specified_autocompare_env_name; -extern const char* const specified_autocompare_config_name; extern const std::vector autocompareMatchers; } // namespace op_regex_match } // namespace dipu From 0a5cd8993fecb18a17dcbf4093cfccd0a04c35b5 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 17:43:14 +0800 Subject: [PATCH 24/29] fix lint --- dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index 42bcca8e2..1fc62c4b1 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -461,7 +461,7 @@ def create_call_aten_cpu_cpp_function_code_from_config(fun_config): opname = re.sub("\.dim_IntList", "", opname) opname = re.sub("\.dim", "", opname) opname = re.sub("\.mode", "", opname) - + opname = opname.replace(".", "_") opname = opname.split(".")[0] if opname[-1] == "_" and len(get_function_return_param_from_schema(schema)) > 0: From 17ce56fd580d07a1de21abd8ddd18bab845d03fb Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 18:08:21 +0800 Subject: [PATCH 25/29] fix clang format --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 2 +- dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 8062fe9a4..e460f5300 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -103,7 +103,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, << " will be fallback to cpu" \ << "\n"); \ } \ - \ + \ else if (((force_fallback) || \ dipu::op_regex_match::isOpMatch( \ opname, dipu::op_regex_match::fallbackMatchers))) { \ diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp index c807fe065..41819eadf 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp @@ -75,9 +75,7 @@ inline DIPUCopyType getCopyType(const at::Tensor& dst, const at::Tensor& src) { src.device().index() == dst.device().index()) { return DIPUCopyType::D2Self; } - if (!isSrcDevice && !isDstDevice) { - return DIPUCopyType::H2H; - } + return DIPUCopyType::H2H; } inline int64_t getMemCopyBytes(const at::Tensor& dst, const at::Tensor& src, From 5413aecc2de86d334599cc5794afc2e9ef5fa849 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 19:38:02 +0800 Subject: [PATCH 26/29] fix --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index e460f5300..315fa23a1 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -102,8 +102,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, << (opname) \ << " will be fallback to cpu" \ << "\n"); \ - } \ - \ + } \ else if (((force_fallback) || \ dipu::op_regex_match::isOpMatch( \ opname, dipu::op_regex_match::fallbackMatchers))) { \ From 6931b62e708616ea00225b69d2f0c1a18d98df2b Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Wed, 24 Apr 2024 19:56:17 +0800 Subject: [PATCH 27/29] fix clang format --- dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index 315fa23a1..abfa8fd5a 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -102,7 +102,7 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, << (opname) \ << " will be fallback to cpu" \ << "\n"); \ - } \ + } \ else if (((force_fallback) || \ dipu::op_regex_match::isOpMatch( \ opname, dipu::op_regex_match::fallbackMatchers))) { \ From dedb0c5f82a32a9a620d75bb80c6090278d03777 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Thu, 25 Apr 2024 15:37:19 +0800 Subject: [PATCH 28/29] simplify register macro --- .../autogen_diopi_wrapper.py | 74 +++----------- .../diopi_functions.yaml | 44 ++++----- .../diopi_wrapper_template.py | 10 +- .../csrc_dipu/aten/RegisterDIPU.hpp | 96 ++++++------------- 4 files changed, 62 insertions(+), 162 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index 1fc62c4b1..e536e4887 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -9,10 +9,8 @@ diopi_wrapper_file_template_content, diopi_wrapper_function_template_content, op_register_template_content, - op_register_disable_autocompare_template_content, custom_autograd_template_content, autocompare_template_content, - op_with_custom_fallback_register_template_content, ) @@ -677,14 +675,6 @@ def create_optional_generator_process_code(arg_name): op_register_template = CodeTemplate(op_register_template_content) -op_disable_autocompare_register_template = CodeTemplate( - op_register_disable_autocompare_template_content -) - -op_with_custom_fallback_register_template = CodeTemplate( - op_with_custom_fallback_register_template_content -) - custom_autograd_template = CodeTemplate(custom_autograd_template_content) autocompare_template = CodeTemplate(autocompare_template_content) @@ -914,7 +904,7 @@ def functions_code_gen(fun_config): fbody += custom_autograd_function_code fun_name = wrapper_fun_name - if fun_config.get("autocompare") not in ["disable"] and fun_config.get( + if fun_config.get("autocompare") not in [False] and fun_config.get( "register_op", True ) in [True, "True"]: auto_compare_fun_name = fun_name + "_autocompare" @@ -949,58 +939,16 @@ def functions_code_gen(fun_config): ) fbody += autocompare_code - # generate the OP_register code - # case 1: custom_fallback=False and autocompare not disabled - register_body = "" - if fun_config.get("custom_fallback", False) in ["False", False] and fun_config.get( - "autocompare", True - ) in ["True", True]: - register_body = op_register_template.substitute( - register_name=[get_op_name_from_schema(fun_config["schema"])], - aten_fun_name=["dipu::native::" + fun_name], - diopi_fun_name=[ - get_fun_name_from_cppsignature(diopi_interface).replace( - "diopi", "::diopi" - ) - ], - ) - - # case2: custom_fallback=False and autocompare=disable - elif fun_config.get("custom_fallback", False) in [ - "False", - False, - ] and fun_config.get("autocompare") in ["disable"]: - register_body = op_disable_autocompare_register_template.substitute( - register_name=[get_op_name_from_schema(fun_config["schema"])], - aten_fun_name=["dipu::native::" + fun_name], - diopi_fun_name=[ - get_fun_name_from_cppsignature(diopi_interface).replace( - "diopi", "::diopi" - ) - ], - ) - # case3: custom_fallback=True and autocompare not disable - elif fun_config.get("custom_fallback", False) in ["True", True] and fun_config.get( - "autocompare", True - ) in ["True", True]: - register_body = op_with_custom_fallback_register_template.substitute( - register_name=[get_op_name_from_schema(fun_config["schema"])], - aten_fun_name=["dipu::native::" + fun_name], - diopi_fun_name=[ - get_fun_name_from_cppsignature(diopi_interface).replace( - "diopi", "::diopi" - ) - ], - force_fallback=[ - ( - "false" - if fun_config.get("force_fallback", False) in [False, "False"] - else "true" - ) - ], - fallbackFunc=["dipu::native::" + "custom_fallback_" + fun_name], - ) - + # generate the op_register code + register_body = op_register_template.substitute( + register_name=[get_op_name_from_schema(fun_config["schema"])], + aten_fun_name=["dipu::native::" + fun_name], + diopi_fun_name=[ + get_fun_name_from_cppsignature(diopi_interface).replace("diopi", "::diopi") + ], + custom_fallback_config=str(fun_config.get("custom_fallback", False)).lower(), + autocompare_config=str(fun_config.get("autocompare", True)).lower(), + ) return fbody, register_body diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml index cf162e07d..b0c434caf 100755 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml @@ -1,5 +1,5 @@ - schema: "exampleop.overloadname(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)" - autocompare: disable + autocompare: False # op gen only on these torch version. use it only if op has different signature on different torch. # if it's only different on implementation , please use compile macro DIPU_TORCHXXX. # torch version number, 5 in total: {X-major}{XX-minor}{XX-patch} @@ -309,7 +309,7 @@ interface: diopiLayerNormBackward(ctx, grad_input, grad_weight, grad_bias, grad_out, input, weight, bias, mean, rstd, normalized_shape); - schema: "adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)" - #autocompare: disable # TODO: cpu impl not support half now + #autocompare: False # TODO: cpu impl not support half now interface: diopiAdaptiveAvgPool2d(ctx, out, self, output_size) - schema: "_adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor" @@ -486,13 +486,13 @@ interface: diopiRelu(ctx, out, self) - schema: "randperm.out(int n, *, Tensor(a!) out) -> Tensor(a!)" - autocompare: disable + autocompare: False custom_code_at_the_beginning: | diopiGeneratorHandle_t generatorDiopiGenerator = toDiopiGeneratorHandle(getDefaultDIPUGenerator()); interface: diopiRandperm(ctx, out, n, generatorDiopiGenerator) - schema: "randperm.generator_out(int n, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiRandperm(ctx, out, n, generator) - schema: "aten::sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, * ScalarType? dtype=None) -> Tensor" @@ -686,7 +686,7 @@ interface: diopiMul(ctx, out, out, out) - schema: "bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiBernoulliScalar(ctx, self, p, generatorDiopiGenerator); - schema: "log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)" @@ -1095,7 +1095,7 @@ interface: diopiRsqrt(ctx, out, self) - schema: "uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiUniformInp(ctx, self, from, to, generator) - schema: "tril(Tensor self, int diagonal=0) -> Tensor" @@ -1177,15 +1177,15 @@ interface: diopiClamp(ctx, out, self, min, max) - schema: "random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiRandomInp(ctx, self, 0, nullptr, generator) - schema: "random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiRandomInp(ctx, self, 0, &to, generator) - schema: "random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: "diopiRandomInp(ctx, self, from, to.has_value() ? &to.value() : nullptr, generator)" - schema: "nonzero(Tensor self) -> Tensor" @@ -1239,7 +1239,7 @@ interface: diopiProd(ctx, out, self_dtype_diopi, &dim) - schema: repeat(Tensor self, SymInt[] repeats) -> Tensor - autocompare: disable + autocompare: False custom_code_at_the_beginning: | std::vector output_size(repeats.size()); for (int i = 0;i< repeats.size();++i) { @@ -1655,41 +1655,41 @@ interface: diopiReciprocal(ctx, out, self) - schema: "normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)" - autocompare: disable + autocompare: False interface: diopiNormalInp(ctx, self, mean, std, generator) - schema: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) - autocompare: disable + autocompare: False interface: diopiNormalTensorScalar(ctx, out, mean, std, generator) - schema: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor - autocompare: disable + autocompare: False custom_code_at_the_beginning: | auto out = nodispatch::empty_like(mean); interface: diopiNormalTensorScalar(ctx, out, mean, std, generator) - schema: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) - autocompare: disable + autocompare: False interface: diopiNormalScalarTensor(ctx, out, mean, std, generator) - schema: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor - autocompare: disable + autocompare: False custom_code_at_the_beginning: | auto out = nodispatch::empty_like(std); interface: diopiNormalScalarTensor(ctx, out, mean, std, generator) - schema: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) - autocompare: disable + autocompare: False interface: diopiNormalTensor(ctx, out, mean, std, generator) - schema: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor - autocompare: disable + autocompare: False custom_code_at_the_beginning: | auto out = nodispatch::empty_like(mean); interface: diopiNormalTensor(ctx, out, mean, std, generator) - schema: normal.float_float_out(float mean, float std, SymInt[] size, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) - autocompare: disable + autocompare: False interface: diopiNormal(ctx, out, mean, std, generator) - schema: "mm(Tensor self, Tensor mat2) -> Tensor" @@ -1829,7 +1829,7 @@ - schema: "ctc_loss_tensor_backward(Tensor grad_output, Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, int reduction=Mean, bool zero_infinity=False) -> Tensor grad_input" device: [camb] - autocompare: disable + autocompare: False register_op: False custom_code_at_the_beginning: | const auto reductionDiopi = static_cast<::diopiReduction_t>(reduction); @@ -1925,7 +1925,7 @@ - schema: "ctc_loss_intlist_backward(Tensor grad_output, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, int reduction=Mean, bool zero_infinity=False) -> Tensor grad_input" device: [camb] - autocompare: disable + autocompare: False register_op: False ins: [input_lengths_tensor, target_lengths_tensor] custom_code_at_the_beginning: | @@ -2767,7 +2767,7 @@ interface: diopiCopyInp(ctx, src, self) - schema: _amp_foreach_non_finite_check_and_unscale_(at::TensorList self, Tensor(b!) found_inf, Tensor inv_scale) -> void - autocompare: disable + autocompare: False custom_fallback: True custom_code_at_the_beginning: | std::vector diopiTensorHandles(self.size(), nullptr); @@ -2779,7 +2779,7 @@ // NOLINTEND(cppcoreguidelines-pro-type-const-cast) interface: diopiAmpForeachNonFiniteCheckAndUnscaleInp(ctx, diopiTensorHandles.data(), static_cast(self.size()), found_inf, inv_scale) # TODO(someone): fix this issue when `autocompare` is on - autocompare: disable + autocompare: False - schema: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!) custom_fallback: True diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py index 070c851f0..6ca764a05 100644 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py @@ -129,15 +129,7 @@ """ op_register_template_content = """ -DIOPI_ATEN_FUNC("$register_name", $diopi_fun_name, $aten_fun_name); -""" - -op_register_disable_autocompare_template_content = """ -DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE("$register_name", $diopi_fun_name, $aten_fun_name); -""" - -op_with_custom_fallback_register_template_content = """ -DIOPI_ATEN_FUNC_CUSTOM_FALLBACK("$register_name", $diopi_fun_name, $force_fallback /*whether force fallback*/, $aten_fun_name, $fallbackFunc); +DIOPI_ATEN_FUNC("$register_name", $diopi_fun_name, $aten_fun_name, $custom_fallback_config, $autocompare_config); """ custom_autograd_template_content = """ diff --git a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp index abfa8fd5a..d3d7e6b24 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.hpp @@ -49,74 +49,34 @@ void dipu_fallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, // It mat be necessary to determine whether to keep torchop default impl // for non-custom ops through function dipuKeepTorchopDefaultImpl firstly in the // future, and we use force fallback to keep torchop default impl now. -#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc) \ - do { \ - if (reinterpret_cast(diopiFunc) == nullptr) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, " \ - << (opname) \ - << " will be fallback to cpu" \ - << "\n"); \ - } else if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers)) { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ - << (opname) << " will be fallback to cpu" \ - << "\n"); \ - } else if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ - } else { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } \ - } while (false); - -#define DIOPI_ATEN_FUNC_DISABLE_AUTOCOMPARE(opname, diopiFunc, wrapperFunc) \ - do { \ - if (reinterpret_cast(diopiFunc) == nullptr) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, " \ - << (opname) \ - << " will be fallback to cpu" \ - << "\n"); \ - } else if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers)) { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ - << (opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ - \ - if ((!dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - m.impl(opname, TORCH_FN(wrapperFunc)); \ - } \ - } while (false); - -// Determine whether to keep torchop default impl for custom ops through -// function dipuKeepTorchopDefaultImpl firstly. -#define DIOPI_ATEN_FUNC_CUSTOM_FALLBACK(opname, diopi_func, force_fallback, \ - wrapper_func, custom_fallback_func) \ - do { \ - if (dipu::native::dipuKeepTorchopDefaultImpl(opname)) { \ - break; \ - } \ - if (reinterpret_cast(diopi_func) == nullptr) { \ - DIPU_OP_LOG_WARNING_ONCE(#diopi_func << " is not yet implemented, " \ - << (opname) \ - << " will be fallback to cpu" \ - << "\n"); \ - } \ - else if (((force_fallback) || \ - dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::fallbackMatchers))) { \ - DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ - << (opname) << " will be fallback to cpu" \ - << "\n"); \ - } \ - else if (dipu::op_regex_match::isOpMatch( \ - opname, dipu::op_regex_match::autocompareMatchers)) { \ - m.impl(opname, TORCH_FN(wrapper_func##_autocompare)); \ - } \ - else { \ - m.impl(opname, TORCH_FN(wrapper_func)); \ - } \ +#define CONCAT_NAME(a, b) a##b +#define DIOPI_ATEN_FUNC(opname, diopiFunc, wrapperFunc, customFallbackConfig, \ + autocompareConfig) \ + do { \ + bool isAutoCompareMatch = dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::autocompareMatchers); \ + bool isFallbackMatch = dipu::op_regex_match::isOpMatch( \ + opname, dipu::op_regex_match::fallbackMatchers); \ + if (reinterpret_cast(diopiFunc) == nullptr) { \ + DIPU_OP_LOG_WARNING_ONCE(#diopiFunc << " is not yet implemented, " \ + << (opname) \ + << " will be fallback to cpu" \ + << "\n"); \ + break; \ + } \ + if ((autocompareConfig) && isAutoCompareMatch && \ + reinterpret_cast(wrapperFunc##_autocompare) != nullptr) { \ + m.impl(opname, TORCH_FN(wrapperFunc##_autocompare)); \ + break; \ + } \ + if ((customFallbackConfig) || isFallbackMatch) { \ + DIPU_OP_LOG_WARNING_ONCE("force fallback has been set, " \ + << (opname) << " will be fallback to cpu" \ + << "\n"); \ + break; \ + } \ + m.impl(opname, TORCH_FN(wrapperFunc)); \ + \ } while (false); class DIPUOpRegister { From 94b98f39c042474176d3fa81ee65e1cd11d0ef34 Mon Sep 17 00:00:00 2001 From: NeosZhang Date: Fri, 26 Apr 2024 17:03:31 +0800 Subject: [PATCH 29/29] generate fake autocompare_func when disable --- .../autogen_diopi_wrapper.py | 79 +++++++++++-------- .../diopi_functions.yaml | 5 +- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py index e536e4887..e973b6669 100644 --- a/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py +++ b/dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py @@ -904,39 +904,54 @@ def functions_code_gen(fun_config): fbody += custom_autograd_function_code fun_name = wrapper_fun_name - if fun_config.get("autocompare") not in [False] and fun_config.get( - "register_op", True - ) in [True, "True"]: + if fun_config.get("register_op", True) in [True, "True"]: auto_compare_fun_name = fun_name + "_autocompare" - autocompare_code = autocompare_template.substitute( - cppsignautre=[ - create_cpp_signature_from_schema(fun_config["schema"]).replace( - raw_fun_name, auto_compare_fun_name - ) - ], - transform_input_to_cpu_code=[ - create_transform_input_to_cpu_code(fun_config) - ], - execute_op_on_cpu_code=[ - create_call_aten_cpu_cpp_function_code_from_config(fun_config) - ], - comment=[fun_config["schema"]], - execute_op_on_device_code=[ - create_call_dipu_cpp_function_code_from_schema( - fun_config["schema"] - ).replace(raw_fun_name, fun_name) - ], - transform_result_to_cpu_code=[], - result_compare_code=[ - create_result_compare_code(fun_config) - + ( - "\nreturn result_device;\n" - if len(get_function_return_param_from_schema(fun_config["schema"])) - > 0 - else "" - ) - ], - ) + autocompare_code = "" + if fun_config.get("autocompare", True) not in [False]: + autocompare_code = autocompare_template.substitute( + cppsignautre=[ + create_cpp_signature_from_schema(fun_config["schema"]).replace( + raw_fun_name, auto_compare_fun_name + ) + ], + transform_input_to_cpu_code=[ + create_transform_input_to_cpu_code(fun_config) + ], + execute_op_on_cpu_code=[ + create_call_aten_cpu_cpp_function_code_from_config(fun_config) + ], + comment=[fun_config["schema"]], + execute_op_on_device_code=[ + create_call_dipu_cpp_function_code_from_schema( + fun_config["schema"] + ).replace(raw_fun_name, fun_name) + ], + transform_result_to_cpu_code=[], + result_compare_code=[ + create_result_compare_code(fun_config) + + ( + "\nreturn result_device;\n" + if len( + get_function_return_param_from_schema(fun_config["schema"]) + ) + > 0 + else "" + ) + ], + ) + if fun_config.get("autocompare", True) in [False]: + disable_autocompare_comment = ( + "// since autocompare is disabled, " + + auto_compare_fun_name + + " will do nothing.\n" + ) + autocompare_code = ( + disable_autocompare_comment + + "void " + + auto_compare_fun_name + + "() " + + "{}\n" + ) fbody += autocompare_code # generate the op_register code diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml index b0c434caf..e1b81f832 100755 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml @@ -2766,8 +2766,7 @@ device: [topsrider] interface: diopiCopyInp(ctx, src, self) -- schema: _amp_foreach_non_finite_check_and_unscale_(at::TensorList self, Tensor(b!) found_inf, Tensor inv_scale) -> void - autocompare: False +- schema: _amp_foreach_non_finite_check_and_unscale_(at::TensorList self, Tensor(b!) found_inf, Tensor inv_scale) -> () custom_fallback: True custom_code_at_the_beginning: | std::vector diopiTensorHandles(self.size(), nullptr); @@ -2778,8 +2777,6 @@ }); // NOLINTEND(cppcoreguidelines-pro-type-const-cast) interface: diopiAmpForeachNonFiniteCheckAndUnscaleInp(ctx, diopiTensorHandles.data(), static_cast(self.size()), found_inf, inv_scale) - # TODO(someone): fix this issue when `autocompare` is on - autocompare: False - schema: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!) custom_fallback: True