From 5756169a54cf2137b3d4440dfaa2778542f75b99 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 30 Aug 2022 18:13:25 -0700 Subject: [PATCH 01/12] refactor: Refactor testing to use cosine similarity, remove redundancy models and restructuring Signed-off-by: Dheeraj Peri --- .../lowering/test_module_fallback_passes.cpp | 2 +- tests/core/partitioning/BUILD | 16 --- .../test_fallback_graph_output.cpp | 69 ---------- tests/cpp/BUILD | 47 ------- tests/cpp/test_compiled_modules.cpp | 65 --------- tests/cpp/test_module_fallback.cpp | 74 ----------- tests/cpp/test_modules_as_engines.cpp | 47 +------ tests/cpp/test_multi_gpu_serde.cpp | 6 +- .../cpp/test_multiple_registered_engines.cpp | 68 ---------- tests/py/api/custom_models.py | 27 ++++ tests/py/api/test_e2e_behavior.py | 97 -------------- tests/py/api/test_embed_engines.py | 60 +++++++++ tests/py/api/test_models.py | 124 ++++++++++++++++++ tests/py/api/test_module_fallback.py | 53 ++++++++ .../api/test_multiple_registered_engines.py | 40 ++++++ tests/py/api/test_operator_fallback.py | 52 ++++++++ tests/py/api/test_ts_backend.py | 83 ++---------- tests/py/api/utils.py | 9 ++ tests/util/util.cpp | 13 ++ tests/util/util.h | 2 + 20 files changed, 402 insertions(+), 552 deletions(-) delete mode 100644 tests/core/partitioning/test_fallback_graph_output.cpp delete mode 100644 tests/cpp/test_compiled_modules.cpp delete mode 100644 tests/cpp/test_module_fallback.cpp delete mode 100644 tests/cpp/test_multiple_registered_engines.cpp create mode 100644 tests/py/api/custom_models.py create mode 100644 tests/py/api/test_embed_engines.py create mode 100644 tests/py/api/test_models.py create mode 100644 tests/py/api/test_module_fallback.py create mode 100644 tests/py/api/test_multiple_registered_engines.py create mode 100644 tests/py/api/test_operator_fallback.py create mode 100644 tests/py/api/utils.py diff --git a/tests/core/lowering/test_module_fallback_passes.cpp b/tests/core/lowering/test_module_fallback_passes.cpp index f11882df8b..e6eb098079 100644 --- a/tests/core/lowering/test_module_fallback_passes.cpp +++ b/tests/core/lowering/test_module_fallback_passes.cpp @@ -124,5 +124,5 @@ TEST(Lowering, LowerAndPartitionSimpleModuleFallbackCorrectly) { } auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99)); } diff --git a/tests/core/partitioning/BUILD b/tests/core/partitioning/BUILD index 83722b4271..5f90be2972 100644 --- a/tests/core/partitioning/BUILD +++ b/tests/core/partitioning/BUILD @@ -55,21 +55,6 @@ cc_test( }), ) -cc_test( - name = "test_fallback_graph_output", - srcs = ["test_fallback_graph_output.cpp"], - data = [ - ":jit_models", - ], - deps = [ - "//tests/util", - "@googletest//:gtest_main", - ] + select({ - ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], - "//conditions:default": ["@libtorch//:libtorch"], - }), -) - cc_test( name = "test_loop_fallback", srcs = ["test_loop_fallback.cpp"], @@ -104,7 +89,6 @@ test_suite( name = "partitioning_tests", tests = [ ":test_conditionals", - ":test_fallback_graph_output", ":test_loading_model", ":test_loop_fallback", ":test_resolve_nontensor_inputs", diff --git a/tests/core/partitioning/test_fallback_graph_output.cpp b/tests/core/partitioning/test_fallback_graph_output.cpp deleted file mode 100644 index 98fc4e6128..0000000000 --- a/tests/core/partitioning/test_fallback_graph_output.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include "core/compiler.h" -#include "gtest/gtest.h" -#include "tests/util/util.h" -#include "torch/script.h" - -#ifndef DISABLE_TEST_IN_CI - -TEST(Partitioning, ComputeResNet50FallbackGraphCorrectly) { - torch::jit::script::Module mod; - try { - mod = torch::jit::load("tests/modules/resnet50_traced.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return; - } - - const std::vector> input_shapes = {{1, 3, 224, 224}}; - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - } - - std::vector input_ranges{torch_tensorrt::core::ir::Input({1, 3, 224, 224})}; - - torch_tensorrt::core::CompileSpec cfg(input_ranges); - cfg.partition_info.enabled = true; - cfg.partition_info.forced_fallback_operators.push_back("aten::add"); - - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); - auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6)); -} - -TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) { - torch::jit::script::Module mod; - try { - mod = torch::jit::load("tests/modules/mobilenet_v2_traced.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return; - } - - const std::vector> input_shapes = {{1, 3, 224, 224}}; - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - } - - std::vector input_ranges{torch_tensorrt::core::ir::Input({1, 3, 224, 224})}; - auto g = mod.get_method("forward").graph(); - torch_tensorrt::core::CompileSpec cfg(input_ranges); - cfg.partition_info.enabled = true; - cfg.partition_info.forced_fallback_operators.push_back("aten::hardtanh"); - - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); - auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6)); -} -#endif diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD index 3d56682189..ea2c6ae752 100644 --- a/tests/cpp/BUILD +++ b/tests/cpp/BUILD @@ -13,12 +13,9 @@ test_suite( name = "api_tests", tests = [ ":test_collections", - ":test_compiled_modules", ":test_default_input_types", ":test_example_tensors", - ":test_module_fallback", ":test_modules_as_engines", - ":test_multiple_registered_engines", ":test_runtime_thread_safety", ":test_serialization", ], @@ -28,12 +25,9 @@ test_suite( name = "aarch64_api_tests", tests = [ ":test_collections", - ":test_compiled_modules", ":test_default_input_types", ":test_example_tensors", - ":test_module_fallback", ":test_modules_as_engines", - ":test_multiple_registered_engines", ":test_runtime_thread_safety", ":test_serialization", ], @@ -72,21 +66,6 @@ cc_test( ], ) -cc_test( - name = "test_multiple_registered_engines", - srcs = ["test_multiple_registered_engines.cpp"], - data = [ - "//tests/modules:jit_models", - ], - deps = [ - "//tests/util", - "@googletest//:gtest_main", - ] + select({ - ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], - "//conditions:default": ["@libtorch//:libtorch"], - }), -) - cc_test( name = "test_modules_as_engines", timeout = "long", @@ -110,21 +89,6 @@ cc_test( ], ) -cc_test( - name = "test_module_fallback", - srcs = ["test_module_fallback.cpp"], - data = [ - "//tests/modules:jit_models", - ], - deps = [ - "//tests/util", - "@googletest//:gtest_main", - ] + select({ - ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], - "//conditions:default": ["@libtorch//:libtorch"], - }), -) - cc_test( name = "test_collections", srcs = ["test_collections.cpp"], @@ -140,17 +104,6 @@ cc_test( }), ) -cc_test( - name = "test_compiled_modules", - srcs = ["test_compiled_modules.cpp"], - data = [ - "//tests/modules:jit_models", - ], - deps = [ - ":cpp_api_test", - ], -) - cc_test( name = "test_multi_gpu_serde", srcs = ["test_multi_gpu_serde.cpp"], diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp deleted file mode 100644 index 595dd7044f..0000000000 --- a/tests/cpp/test_compiled_modules.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "cpp_api_test.h" - -TEST_P(CppAPITests, CompiledModuleIsClose) { - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - std::vector shapes; - for (uint64_t i = 0; i < input_shapes.size(); i++) { - auto in = at::randint(5, input_shapes[i], {at::kCUDA}).to(input_types[i]); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - auto in_spec = torch_tensorrt::Input(input_shapes[i]); - in_spec.dtype = input_types[i]; - shapes.push_back(in_spec); - std::cout << in_spec << std::endl; - } - - torch::jit::IValue jit_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod, jit_inputs_ivalues); - std::vector jit_results; - if (jit_results_ivalues.isTuple()) { - auto tuple = jit_results_ivalues.toTuple(); - for (auto t : tuple->elements()) { - jit_results.push_back(t.toTensor()); - } - } else { - jit_results.push_back(jit_results_ivalues.toTensor()); - } - - auto spec = torch_tensorrt::ts::CompileSpec(shapes); - spec.truncate_long_and_double = true; - - auto trt_mod = torch_tensorrt::ts::compile(mod, spec); - torch::jit::IValue trt_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(trt_mod, trt_inputs_ivalues); - std::vector trt_results; - if (trt_results_ivalues.isTuple()) { - auto tuple = trt_results_ivalues.toTuple(); - for (auto t : tuple->elements()) { - trt_results.push_back(t.toTensor()); - } - } else { - trt_results.push_back(trt_results_ivalues.toTensor()); - } - - for (size_t i = 0; i < trt_results.size(); i++) { - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), threshold)); - } -} - -#ifndef DISABLE_TEST_IN_CI - -INSTANTIATE_TEST_SUITE_P( - CompiledModuleForwardIsCloseSuite, - CppAPITests, - testing::Values( - PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet50_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/mobilenet_v2_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet50_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-3}), - PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}, 8e-2}), - PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-2}))); - -#endif diff --git a/tests/cpp/test_module_fallback.cpp b/tests/cpp/test_module_fallback.cpp deleted file mode 100644 index d1221cde4d..0000000000 --- a/tests/cpp/test_module_fallback.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include "gtest/gtest.h" -#include "tests/util/util.h" -#include "torch/script.h" -#include "torch_tensorrt/torch_tensorrt.h" - -#ifndef DISABLE_TEST_IN_CI - -TEST(CppAPITest, ResNetModuleFallbacksCorrectly) { - torch::jit::script::Module mod; - try { - mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - ASSERT_TRUE(false); - } - - const std::vector> input_shapes = {{1, 3, 224, 224}}; - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - } - - torch_tensorrt::ts::CompileSpec cfg(input_shapes); - cfg.torch_executed_modules.push_back("torchvision.models.resnet.BasicBlock"); - - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); - auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6)); -} - -TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) { - torch::jit::script::Module mod; - try { - mod = torch::jit::load("tests/modules/mobilenet_v2_scripted.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - ASSERT_TRUE(false); - } - - const std::vector> input_shapes = {{1, 3, 224, 224}}; - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - } - - torch_tensorrt::ts::CompileSpec cfg(input_shapes); - cfg.min_block_size = 5; - cfg.torch_executed_modules.push_back("torchvision.models.mobilenetv2.ConvBNActivation"); - - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); - auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); - - auto g = trt_mod.get_method("forward").graph(); - auto nodes = g->block()->nodes(); - std::size_t trt_count = 0; - for (const auto n : nodes) { - if (n->kind().toQualString() == std::string("tensorrt::execute_engine")) { - trt_count++; - } - } - ASSERT_TRUE(trt_count == 1); - - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6)); -} -#endif diff --git a/tests/cpp/test_modules_as_engines.cpp b/tests/cpp/test_modules_as_engines.cpp index 4437b1218c..21670acdaf 100644 --- a/tests/cpp/test_modules_as_engines.cpp +++ b/tests/cpp/test_modules_as_engines.cpp @@ -15,40 +15,7 @@ TEST_P(CppAPITests, ModuleAsEngineIsClose) { auto trt_results = torch_tensorrt::tests::util::RunModuleForwardAsEngine(mod, inputs); ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold)); -} - -TEST_P(CppAPITests, ModuleToEngineToModuleIsClose) { - std::vector inputs; - std::vector inputs_ivalues; - for (uint64_t i = 0; i < input_shapes.size(); i++) { - inputs.push_back(at::randint(5, input_shapes[i], {at::kCUDA}).to(input_types[i])); - inputs_ivalues.push_back(inputs[inputs.size() - 1].clone()); - } - - torch::jit::IValue jit_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod, inputs_ivalues); - std::vector jit_results; - jit_results.push_back(jit_results_ivalues.toTensor()); - - std::vector> input_ranges; - for (auto in : inputs) { - input_ranges.push_back(in.sizes()); - } - - auto compile_spec = torch_tensorrt::ts::CompileSpec({input_ranges}); - int device_id = 0; - cudaGetDevice(&device_id); - compile_spec.device.device_type = torch_tensorrt::Device::DeviceType::kGPU; - compile_spec.device.gpu_id = device_id; - auto engine = torch_tensorrt::ts::convert_method_to_trt_engine(mod, "forward", input_ranges); - auto trt_mod = torch_tensorrt::ts::embed_engine_in_new_module(engine, compile_spec.device); - - torch::jit::IValue trt_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(trt_mod, inputs_ivalues); - std::vector trt_results; - trt_results.push_back(trt_results_ivalues.toTensor()); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold)); + torch_tensorrt::tests::util::cosineSimEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold)); } #ifndef DISABLE_TEST_IN_CI @@ -57,12 +24,8 @@ INSTANTIATE_TEST_SUITE_P( ModuleAsEngineForwardIsCloseSuite, CppAPITests, testing::Values( - PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet50_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/mobilenet_v2_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/resnet50_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), - PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 1e-4}), - PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-2}))); + PathAndInput({"tests/modules/resnet50_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), + PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), + PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), + PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}))); #endif diff --git a/tests/cpp/test_multi_gpu_serde.cpp b/tests/cpp/test_multi_gpu_serde.cpp index 8672ae9517..0b3944125b 100644 --- a/tests/cpp/test_multi_gpu_serde.cpp +++ b/tests/cpp/test_multi_gpu_serde.cpp @@ -23,12 +23,12 @@ TEST_P(CppAPITests, CompiledModuleIsClose) { trt_results.push_back(trt_results_ivalues.toTensor()); for (size_t i = 0; i < trt_results.size(); i++) { - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( - jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0")), 2e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual( + jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0")), threshold)); } } INSTANTIATE_TEST_SUITE_P( CompiledModuleForwardIsCloseSuite, CppAPITests, - testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}))); + testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}))); diff --git a/tests/cpp/test_multiple_registered_engines.cpp b/tests/cpp/test_multiple_registered_engines.cpp deleted file mode 100644 index 2746687f68..0000000000 --- a/tests/cpp/test_multiple_registered_engines.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include "gtest/gtest.h" -#include "tests/util/util.h" -#include "torch/script.h" -#include "torch_tensorrt/torch_tensorrt.h" - -#ifndef DISABLE_TEST_IN_CI - -TEST(CppAPITest, CanRunMultipleEngines) { - torch::jit::script::Module mod1; - torch::jit::script::Module mod2; - try { - mod1 = torch::jit::load("tests/modules/resnet50_traced.jit.pt"); - mod2 = torch::jit::load("tests/modules/resnet18_traced.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return; - } - - const std::vector> input_shapes = {{1, 3, 224, 224}}; - - std::vector jit1_inputs_ivalues; - std::vector trt1_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit1_inputs_ivalues.push_back(in.clone()); - trt1_inputs_ivalues.push_back(in.clone()); - } - - std::vector jit2_inputs_ivalues; - std::vector trt2_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit2_inputs_ivalues.push_back(in.clone()); - trt2_inputs_ivalues.push_back(in.clone()); - } - - torch::jit::IValue jit1_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod1, jit1_inputs_ivalues); - std::vector jit1_results; - jit1_results.push_back(jit1_results_ivalues.toTensor()); - - torch::jit::IValue jit2_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod2, jit2_inputs_ivalues); - std::vector jit2_results; - jit2_results.push_back(jit2_results_ivalues.toTensor()); - - auto trt_mod1 = torch_tensorrt::ts::compile(mod1, input_shapes); - torch::jit::IValue trt1_results_ivalues = - torch_tensorrt::tests::util::RunModuleForward(trt_mod1, trt1_inputs_ivalues); - std::vector trt1_results; - trt1_results.push_back(trt1_results_ivalues.toTensor()); - - auto trt_mod2 = torch_tensorrt::ts::compile(mod2, input_shapes); - torch::jit::IValue trt2_results_ivalues = - torch_tensorrt::tests::util::RunModuleForward(trt_mod2, trt2_inputs_ivalues); - std::vector trt2_results; - trt2_results.push_back(trt2_results_ivalues.toTensor()); - - for (size_t i = 0; i < trt1_results.size(); i++) { - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 2e-5)); - } - - for (size_t i = 0; i < trt2_results.size(); i++) { - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 2e-5)); - } -} -#endif diff --git a/tests/py/api/custom_models.py b/tests/py/api/custom_models.py new file mode 100644 index 0000000000..c6c0bb4c68 --- /dev/null +++ b/tests/py/api/custom_models.py @@ -0,0 +1,27 @@ +import torch +from transformers import BertModel, BertTokenizer, BertConfig + +def BertModule(): + model_name = "bert-base-uncased" + enc = BertTokenizer.from_pretrained(model_name) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = enc.tokenize(text) + masked_index = 8 + tokenized_text[masked_index] = "[MASK]" + indexed_tokens = enc.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) + config = BertConfig( + vocab_size_or_config_json_file=32000, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + torchscript=True, + ) + model = BertModel(config) + model.eval() + model = BertModel.from_pretrained(model_name, torchscript=True) + traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors]) + return traced_model diff --git a/tests/py/api/test_e2e_behavior.py b/tests/py/api/test_e2e_behavior.py index d1da3e0465..35cd3509dc 100644 --- a/tests/py/api/test_e2e_behavior.py +++ b/tests/py/api/test_e2e_behavior.py @@ -5,103 +5,6 @@ import copy from typing import Dict - -class TestCompileHalf(unittest.TestCase): - def test_compile_script_half(self): - self.model = models.resnet18(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - self.scripted_model.half() - - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape, dtype=torch.half)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.half}, - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = ( - (trt_mod(self.input.half()) - self.scripted_model(self.input.half())) - .abs() - .max() - ) - torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) - self.assertTrue(same < 3e-2) - - def test_compile_script_half_by_default(self): - self.model = models.resnet18(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - self.scripted_model.half() - - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float, torch.half}, - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = ( - (trt_mod(self.input.half()) - self.scripted_model(self.input.half())) - .abs() - .max() - ) - torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) - self.assertTrue(same < 3e-2) - - -class TestFallbackToTorch(unittest.TestCase): - def test_fallback(self): - self.model = models.resnet18(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False, - }, - "require_full_compilation": False, - "torch_executed_ops": ["aten::max_pool2d"], - "min_block_size": 1, - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - - def test_module_fallback(self): - self.model = models.resnet18(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False, - }, - "require_full_compilation": False, - "torch_executed_modules": ["torchvision.models.resnet.BasicBlock"], - "min_block_size": 1, - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - - class TestInputTypeDefaultsFP32Model(unittest.TestCase): def test_input_use_default_fp32(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") diff --git a/tests/py/api/test_embed_engines.py b/tests/py/api/test_embed_engines.py new file mode 100644 index 0000000000..133c4c6a50 --- /dev/null +++ b/tests/py/api/test_embed_engines.py @@ -0,0 +1,60 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +import timm +import custom_models as cm +from typing import Dict +from utils import cosine_similarity, COSINE_THRESHOLD + +class TestModelToEngineToModel(unittest.TestCase): + def test_resnet50(self): + self.model = models.resnet50(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + + self.scripted_model = torch.jit.script(self.model) + trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.scripted_model, "forward", **compile_spec) + trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_efficientnet_b0(self): + self.model = timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + + self.scripted_model = torch.jit.script(self.model) + trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.scripted_model, "forward", **compile_spec) + trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine) + + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/api/test_models.py b/tests/py/api/test_models.py new file mode 100644 index 0000000000..84860b9305 --- /dev/null +++ b/tests/py/api/test_models.py @@ -0,0 +1,124 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +import timm +import custom_models as cm +from typing import Dict +from utils import cosine_similarity, COSINE_THRESHOLD + +class TestModels(unittest.TestCase): + def test_resnet50(self): + self.model = models.resnet50(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_mobilenet_v2(self): + self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_efficientnet_b0(self): + self.model = timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_bert_base_uncased(self): + self.model = cm.BertModule().cuda() + self.input = torch.randint(0, 5, (1, 14), dtype=torch.int32).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input(self.input.shape, dtype=self.input.dtype, format=torch.contiguous_format), + torchtrt.Input(self.input.shape, dtype=self.input.dtype, format=torch.contiguous_format) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + "truncate_long_and_double": True, + } + with torchtrt.logging.errors(): + trt_mod = torchtrt.ts.compile(self.model, **compile_spec) + + model_outputs = self.model(self.input, self.input) + trt_model_outputs = trt_mod(self.input, self.input) + for out, trt_out in zip(model_outputs, trt_model_outputs): + cos_sim = cosine_similarity(out, trt_out) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_resnet50_half(self): + self.model = models.resnet50(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + self.scripted_model.half() + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.half, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.half}, + } + + trt_mod = torchtrt.compile(self.scripted_model, **compile_spec) + cos_sim = cosine_similarity(self.model.half()(self.input.half()), trt_mod(self.input.half())) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/api/test_module_fallback.py b/tests/py/api/test_module_fallback.py new file mode 100644 index 0000000000..5d5fc425c2 --- /dev/null +++ b/tests/py/api/test_module_fallback.py @@ -0,0 +1,53 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict +from utils import cosine_similarity, COSINE_THRESHOLD + +class TestModuleFallback(unittest.TestCase): + def test_fallback_resnet18(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + "torch_executed_modules": ["torchvision.models.resnet.BasicBlock"], + } + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_fallback_mobilenet_v2(self): + self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + "torch_executed_modules": ["torchvision.models.mobilenetv2.ConvBNActivation"], + "min_block_size": 5, + } + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/api/test_multiple_registered_engines.py b/tests/py/api/test_multiple_registered_engines.py new file mode 100644 index 0000000000..fb201f9d8f --- /dev/null +++ b/tests/py/api/test_multiple_registered_engines.py @@ -0,0 +1,40 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +import timm +import custom_models as cm +from typing import Dict +from utils import cosine_similarity, COSINE_THRESHOLD + +class TestModelToEngineToModel(unittest.TestCase): + def test_multiple_engines(self): + self.resnet18 = models.resnet18(pretrained=True).eval().to("cuda") + self.resnet50 = models.resnet50(pretrained=True).eval().to("cuda") + self.input1 = torch.randn((1, 3, 224, 224)).to("cuda") + self.input2 = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input1.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + } + rn18_trt_mod = torchtrt.compile(self.resnet18, **compile_spec) + rn50_trt_mod = torchtrt.compile(self.resnet50, **compile_spec) + + cos_sim = cosine_similarity(self.resnet18(self.input1), rn18_trt_mod(self.input1)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + cos_sim = cosine_similarity(self.resnet50(self.input1), rn50_trt_mod(self.input1)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/api/test_operator_fallback.py b/tests/py/api/test_operator_fallback.py new file mode 100644 index 0000000000..25d1b7cd92 --- /dev/null +++ b/tests/py/api/test_operator_fallback.py @@ -0,0 +1,52 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict +from utils import cosine_similarity, COSINE_THRESHOLD + +class TestFallbackModels(unittest.TestCase): + def test_fallback_resnet18(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + "torch_executed_ops": ["aten::add"], + } + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + def test_fallback_mobilenet_v2(self): + self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + compile_spec = { + "inputs": [ + torchtrt.Input( + self.input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float}, + "torch_executed_ops": ["aten::hardtanh"], + } + trt_mod = torchtrt.compile(self.model, **compile_spec) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/api/test_ts_backend.py b/tests/py/api/test_ts_backend.py index d0654a8f75..891f4ba178 100644 --- a/tests/py/api/test_ts_backend.py +++ b/tests/py/api/test_ts_backend.py @@ -4,7 +4,7 @@ import torchvision.models as models import copy from typing import Dict - +from utils import cosine_similarity, COSINE_THRESHOLD class TestCompile(unittest.TestCase): def test_compile_traced(self): @@ -26,8 +26,8 @@ def test_compile_traced(self): } trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_compile_script(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -40,8 +40,8 @@ def test_compile_script(self): device=torchtrt.Device(gpu_id=0), enabled_precisions={torch.float}, ) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_compile_global(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -53,21 +53,8 @@ def test_compile_global(self): device=torchtrt.Device(gpu_id=0), enabled_precisions={torch.float}, ) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_global_nn_mod(self): - self.model = models.vgg16(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - with torch.no_grad(): - trt_mod = torchtrt.compile( - self.model, - inputs=[self.input], - device=torchtrt.Device(gpu_id=0), - enabled_precisions={torch.float}, - ) - same = (trt_mod(self.input) - self.model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_from_torch_tensor(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -83,8 +70,8 @@ def test_from_torch_tensor(self): } trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_device(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -97,8 +84,8 @@ def test_device(self): } trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_default_device(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -107,52 +94,8 @@ def test_default_device(self): compile_spec = {"inputs": [self.input], "enabled_precisions": {torch.float}} trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_script_from_dict(self): - self.model = models.vgg16(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.traced_model = torch.jit.trace(self.model, [self.input]) - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float}, - } - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - -class TestPTtoTRTtoPT(unittest.TestCase): - def test_pt_to_trt_to_pt(self): - self.model = models.vgg16(pretrained=True).eval().to("cuda") - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.ts_model = torch.jit.trace(self.model, [self.input]) - - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False, - }, - } - - trt_engine = torchtrt.ts.convert_method_to_trt_engine( - self.ts_model, "forward", **compile_spec - ) - trt_mod = torchtrt.ts.embed_engine_in_new_module( - trt_engine, torchtrt.Device("cuda:0") - ) - same = (trt_mod(self.input) - self.ts_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestCheckMethodOpSupport(unittest.TestCase): def test_check_support(self): diff --git a/tests/py/api/utils.py b/tests/py/api/utils.py new file mode 100644 index 0000000000..e71bb09c6d --- /dev/null +++ b/tests/py/api/utils.py @@ -0,0 +1,9 @@ +import torch + +COSINE_THRESHOLD=0.99 + +def cosine_similarity(gt_tensor, pred_tensor): + res = torch.nn.functional.cosine_similarity(gt_tensor.flatten().to(torch.float32), pred_tensor.flatten().to(torch.float32), dim=0, eps=1e-6) + res = res.cpu().detach().item() + + return res diff --git a/tests/util/util.cpp b/tests/util/util.cpp index 13d0d18566..91004c06ff 100644 --- a/tests/util/util.cpp +++ b/tests/util/util.cpp @@ -1,10 +1,23 @@ #include "core/util/prelude.h" #include "torch/script.h" +#include "torch/torch.h" namespace torch_tensorrt { namespace tests { namespace util { +bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = 0.99f){ + + torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0)); + std::ostringstream ss; + ss << computed_tensor << std::endl << gt_tensor << std::endl; + LOG_GRAPH(ss.str()); + LOG_GRAPH(std::string("Cosine Similarity score: ") + std::to_string(cosine_sim.item())); + LOG_GRAPH(std::string("Acceptable Threshold: ") + std::to_string(threshold)); + + return cosine_sim.item() >= threshold; +} + bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = 1e-8, float rtol = 1e-5) { std::ostringstream ss; ss << computed_tensor << std::endl << gt_tensor << std::endl; diff --git a/tests/util/util.h b/tests/util/util.h index f39e2a5766..1ea62a16e0 100644 --- a/tests/util/util.h +++ b/tests/util/util.h @@ -11,6 +11,8 @@ namespace torch_tensorrt { namespace tests { namespace util { +bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold); + bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = 1e-8, float rtol = 1e-5); bool exactlyEqual(const at::Tensor& a, const at::Tensor& b); From c6f3103cd3295f3be5c37e349ac5aa0a809bacb4 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 31 Aug 2022 11:18:53 -0700 Subject: [PATCH 02/12] chore: move to cosine similarity comparison Signed-off-by: Dheeraj Peri --- tests/cpp/test_collections.cpp | 10 ++++---- tests/py/api/test_collections.py | 41 +++++++++++++------------------- tests/py/api/utils.py | 7 +++++- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/tests/cpp/test_collections.cpp b/tests/cpp/test_collections.cpp index d01665adcd..3318aec99d 100644 --- a/tests/cpp/test_collections.cpp +++ b/tests/cpp/test_collections.cpp @@ -42,7 +42,7 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); auto trt_out = trt_mod.forward(inputs_); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99)); } TEST(CppAPITests, TestCollectionTupleInput) { @@ -85,7 +85,7 @@ TEST(CppAPITests, TestCollectionTupleInput) { auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings); auto trt_out = trt_mod.forward(complex_inputs); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99)); } TEST(CppAPITests, TestCollectionListInput) { @@ -144,7 +144,7 @@ TEST(CppAPITests, TestCollectionListInput) { LOG_DEBUG("Finish compile"); auto trt_out = trt_mod.forward(complex_inputs); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99)); } TEST(CppAPITests, TestCollectionTupleInputOutput) { @@ -192,7 +192,7 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { auto trt_out = trt_mod.forward(complex_inputs); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( - out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); + out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); } @@ -317,4 +317,4 @@ TEST(CppAPITests, TestCollectionComplexModel) { out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); -} \ No newline at end of file +} diff --git a/tests/py/api/test_collections.py b/tests/py/api/test_collections.py index dfae3f18c9..88147e005e 100644 --- a/tests/py/api/test_collections.py +++ b/tests/py/api/test_collections.py @@ -3,6 +3,7 @@ import torch import torchvision.models as models import os +from utils import cosine_similarity, COSINE_THRESHOLD def find_repo_root(max_depth=10): @@ -40,12 +41,8 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - same = ( - (trt_mod(self.input, self.input) - self.model(self.input, self.input)) - .abs() - .max() - ) - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input, self.input), trt_mod(self.input, self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"standard_tensor_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestTupleInput(unittest.TestCase): @@ -68,12 +65,8 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - same = ( - (trt_mod((self.input, self.input)) - self.model((self.input, self.input))) - .abs() - .max() - ) - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model((self.input, self.input)), trt_mod((self.input, self.input))) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"tuple_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestListInput(unittest.TestCase): @@ -94,12 +87,8 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - same = ( - (trt_mod([self.input, self.input]) - self.model([self.input, self.input])) - .abs() - .max() - ) - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model([self.input, self.input]), trt_mod([self.input, self.input])) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestTupleInputOutput(unittest.TestCase): @@ -124,8 +113,9 @@ def test_compile(self): trt_mod = torchtrt.ts.compile(self.model, **compile_spec) trt_out = trt_mod((self.input, self.input)) pyt_out = self.model((self.input, self.input)) - results = [(t - p).abs().max() < 2e-2 for (t, p) in zip(trt_out, pyt_out)] - self.assertTrue(all(results)) + for (t, p) in zip(trt_out, pyt_out): + cos_sim = cosine_similarity(t, p) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"tuple_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestListInputOutput(unittest.TestCase): @@ -150,8 +140,10 @@ def test_compile(self): trt_mod = torchtrt.ts.compile(self.model, **compile_spec) trt_out = trt_mod((self.input, self.input)) pyt_out = self.model((self.input, self.input)) - results = [(t - p).abs().max() < 2e-2 for (t, p) in zip(trt_out, pyt_out)] - self.assertTrue(all(results)) + + for (t, p) in zip(trt_out, pyt_out): + cos_sim = cosine_similarity(t, p) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") class TestListInputTupleOutput(unittest.TestCase): @@ -176,8 +168,9 @@ def test_compile(self): trt_mod = torchtrt.ts.compile(self.model, **compile_spec) trt_out = trt_mod((self.input, self.input)) pyt_out = self.model((self.input, self.input)) - results = [(t - p).abs().max() < 2e-2 for (t, p) in zip(trt_out, pyt_out)] - self.assertTrue(all(results)) + for (t, p) in zip(trt_out, pyt_out): + cos_sim = cosine_similarity(t, p) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_tuple_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") if __name__ == "__main__": diff --git a/tests/py/api/utils.py b/tests/py/api/utils.py index e71bb09c6d..a43b54a4a7 100644 --- a/tests/py/api/utils.py +++ b/tests/py/api/utils.py @@ -3,7 +3,12 @@ COSINE_THRESHOLD=0.99 def cosine_similarity(gt_tensor, pred_tensor): - res = torch.nn.functional.cosine_similarity(gt_tensor.flatten().to(torch.float32), pred_tensor.flatten().to(torch.float32), dim=0, eps=1e-6) + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) res = res.cpu().detach().item() return res From beeac7cd39761cd4919652f6659f0cb659b29812 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 31 Aug 2022 17:18:13 -0700 Subject: [PATCH 03/12] refactor: Refactor nox file testing Signed-off-by: Dheeraj Peri --- .circleci/config.yml | 1 + noxfile.py | 101 +++++++----------- py/torch_tensorrt/ts/_compile_spec.py | 6 +- tests/py/api/test_embed_engines.py | 1 - tests/py/hw/test_api_dla.py | 9 +- tests/py/hw/test_multi_gpu.py | 17 +-- tests/py/integrations/test_to_backend_api.py | 9 +- .../test_trt_intercompatibility.py | 6 +- tests/py/{api => models}/custom_models.py | 0 tests/py/{api => models}/test_models.py | 0 .../test_multiple_registered_engines.py | 0 tests/py/models/utils.py | 14 +++ 12 files changed, 75 insertions(+), 89 deletions(-) rename tests/py/{api => models}/custom_models.py (100%) rename tests/py/{api => models}/test_models.py (100%) rename tests/py/{api => models}/test_multiple_registered_engines.py (100%) create mode 100644 tests/py/models/utils.py diff --git a/.circleci/config.yml b/.circleci/config.yml index dcbc84cc9a..16dda8609f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -435,6 +435,7 @@ commands: mkdir -p /tmp/artifacts/test_results cd tests/py pytest --junitxml=/tmp/artifacts/test_results/api/api_test_results.xml api/ + pytest --junitxml=/tmp/artifacts/test_results/models/models_test_results.xml models/ pytest --junitxml=/tmp/artifacts/test_results/integrations/integrations_test_results.xml integrations/ cd ~/project diff --git a/noxfile.py b/noxfile.py index 41926b5ee1..2b8e2da9b3 100644 --- a/noxfile.py +++ b/noxfile.py @@ -30,13 +30,15 @@ if USE_HOST_DEPS: print("Using dependencies from host python") +# Set epochs to train VGG model for accuracy tests +EPOCHS=25 + SUPPORTED_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] nox.options.sessions = [ "l0_api_tests-" + "{}.{}".format(sys.version_info.major, sys.version_info.minor) ] - def install_deps(session): print("Installing deps") session.install("-r", os.path.join(TOP_DIR, "py", "requirements.txt")) @@ -63,31 +65,6 @@ def install_torch_trt(session): session.run("python", "setup.py", "develop") -def download_datasets(session): - print( - "Downloading dataset to path", - os.path.join(TOP_DIR, "examples/int8/training/vgg16"), - ) - session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16")) - session.run_always( - "wget", "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", external=True - ) - session.run_always("tar", "-xvzf", "cifar-10-binary.tar.gz", external=True) - session.run_always( - "mkdir", - "-p", - os.path.join(TOP_DIR, "tests/accuracy/datasets/data"), - external=True, - ) - session.run_always( - "cp", - "-rpf", - os.path.join(TOP_DIR, "examples/int8/training/vgg16/cifar-10-batches-bin"), - os.path.join(TOP_DIR, "tests/accuracy/datasets/data/cidar-10-batches-bin"), - external=True, - ) - - def train_model(session): session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16")) session.install("-r", "requirements.txt") @@ -107,14 +84,14 @@ def train_model(session): "--ckpt-dir", "vgg16_ckpts", "--epochs", - "25", + str(EPOCHS), env={"PYTHONPATH": PYT_PATH}, ) session.run_always( "python", "export_ckpt.py", - "vgg16_ckpts/ckpt_epoch25.pth", + "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth", env={"PYTHONPATH": PYT_PATH}, ) else: @@ -130,10 +107,10 @@ def train_model(session): "--ckpt-dir", "vgg16_ckpts", "--epochs", - "25", + str(EPOCHS), ) - session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch25.pth") + session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth") def finetune_model(session): @@ -156,9 +133,9 @@ def finetune_model(session): "--ckpt-dir", "vgg16_ckpts", "--start-from", - "25", + str(EPOCHS), "--epochs", - "26", + str(EPOCHS+1), env={"PYTHONPATH": PYT_PATH}, ) @@ -166,7 +143,7 @@ def finetune_model(session): session.run_always( "python", "export_qat.py", - "vgg16_ckpts/ckpt_epoch26.pth", + "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth", env={"PYTHONPATH": PYT_PATH}, ) else: @@ -182,13 +159,13 @@ def finetune_model(session): "--ckpt-dir", "vgg16_ckpts", "--start-from", - "25", + str(EPOCHS), "--epochs", - "26", + str(EPOCHS+1), ) # Export model - session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch26.pth") + session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth") def cleanup(session): @@ -209,7 +186,7 @@ def run_base_tests(session): print("Running basic tests") session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ - "api", + "api/test_e2e_behavior.py", "integrations/test_to_backend_api.py", ] for test in tests: @@ -218,6 +195,18 @@ def run_base_tests(session): else: session.run_always("pytest", test) +def run_model_tests(session): + print("Running model tests") + session.chdir(os.path.join(TOP_DIR, "tests/py")) + tests = [ + "models", + ] + for test in tests: + if USE_HOST_DEPS: + session.run_always("pytest", test, env={"PYTHONPATH": PYT_PATH}) + else: + session.run_always("pytest", test) + def run_accuracy_tests(session): print("Running accuracy tests") @@ -268,8 +257,8 @@ def run_trt_compatibility_tests(session): copy_model(session) session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ - "test_trt_intercompatibility.py", - "test_ptq_trt_calibrator.py", + "integrations/test_trt_intercompatibility.py", + #"ptq/test_ptq_trt_calibrator.py", ] for test in tests: if USE_HOST_DEPS: @@ -282,7 +271,7 @@ def run_dla_tests(session): print("Running DLA tests") session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ - "test_api_dla.py", + "hw/test_api_dla.py", ] for test in tests: if USE_HOST_DEPS: @@ -295,7 +284,7 @@ def run_multi_gpu_tests(session): print("Running multi GPU tests") session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ - "test_multi_gpu.py", + "hw/test_multi_gpu.py", ] for test in tests: if USE_HOST_DEPS: @@ -321,22 +310,18 @@ def run_l0_dla_tests(session): run_base_tests(session) cleanup(session) - -def run_l1_accuracy_tests(session): +def run_l1_model_tests(session): if not USE_HOST_DEPS: install_deps(session) install_torch_trt(session) - download_datasets(session) - train_model(session) - run_accuracy_tests(session) + download_models(session) + run_model_tests(session) cleanup(session) - def run_l1_int8_accuracy_tests(session): if not USE_HOST_DEPS: install_deps(session) install_torch_trt(session) - download_datasets(session) train_model(session) finetune_model(session) run_int8_accuracy_tests(session) @@ -348,7 +333,6 @@ def run_l2_trt_compatibility_tests(session): install_deps(session) install_torch_trt(session) download_models(session) - download_datasets(session) train_model(session) run_trt_compatibility_tests(session) cleanup(session) @@ -368,18 +352,15 @@ def l0_api_tests(session): """When a developer needs to check correctness for a PR or something""" run_l0_api_tests(session) - @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l0_dla_tests(session): """When a developer needs to check basic api functionality using host dependencies""" run_l0_dla_tests(session) - @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) -def l1_accuracy_tests(session): - """Checking accuracy performance on various usecases""" - run_l1_accuracy_tests(session) - +def l1_model_tests(session): + """When a developer needs to check correctness for a PR or something""" + run_l1_model_tests(session) @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l1_int8_accuracy_tests(session): @@ -397,13 +378,3 @@ def l2_trt_compatibility_tests(session): def l2_multi_gpu_tests(session): """Makes sure that Torch-TensorRT can operate on multi-gpu systems""" run_l2_multi_gpu_tests(session) - - -@nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) -def download_test_models(session): - """Grab all the models needed for testing""" - try: - import torch - except ModuleNotFoundError: - install_deps(session) - download_models(session) diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index 154b29dd7b..8f24bc76ad 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -225,8 +225,8 @@ def _parse_input_signature(input_signature: Any): def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec: - # TODO: Remove deep copy once collections does not need partial compilation - compile_spec = deepcopy(compile_spec_) + # TODO: Use deepcopy to support partial compilation of collections + compile_spec = compile_spec_ info = _ts_C.CompileSpec() if len(compile_spec["inputs"]) > 0: @@ -301,7 +301,7 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec: compile_spec["enabled_precisions"] ) - if "calibrator" in compile_spec: + if "calibrator" in compile_spec and compile_spec["calibrator"]: info.ptq_calibrator = compile_spec["calibrator"] if "sparse_weights" in compile_spec: diff --git a/tests/py/api/test_embed_engines.py b/tests/py/api/test_embed_engines.py index 133c4c6a50..15bbffa62b 100644 --- a/tests/py/api/test_embed_engines.py +++ b/tests/py/api/test_embed_engines.py @@ -4,7 +4,6 @@ import torchvision.models as models import copy import timm -import custom_models as cm from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD diff --git a/tests/py/hw/test_api_dla.py b/tests/py/hw/test_api_dla.py index 57b149faa7..ae6005bb1b 100644 --- a/tests/py/hw/test_api_dla.py +++ b/tests/py/hw/test_api_dla.py @@ -2,6 +2,7 @@ import torch_tensorrt as torchtrt import torch import torchvision.models as models +from utils import cosine_similarity, COSINE_THRESHOLD class ModelTestCaseOnDLA(unittest.TestCase): @@ -39,8 +40,8 @@ def test_compile_traced(self): } trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_compile_script(self): compile_spec = { @@ -55,8 +56,8 @@ def test_compile_script(self): } trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_suite(): diff --git a/tests/py/hw/test_multi_gpu.py b/tests/py/hw/test_multi_gpu.py index c068cc71b0..033404c927 100644 --- a/tests/py/hw/test_multi_gpu.py +++ b/tests/py/hw/test_multi_gpu.py @@ -35,9 +35,9 @@ def test_compile_traced(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) torchtrt.set_device(self.target_gpu) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) torchtrt.set_device(0) - self.assertTrue(same < 2e-3) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_compile_script(self): torchtrt.set_device(0) @@ -54,9 +54,10 @@ def test_compile_script(self): trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) torchtrt.set_device(self.target_gpu) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) torchtrt.set_device(0) - self.assertTrue(same < 2e-3) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + class TestMultiGpuSerializeDeserializeSwitching(ModelTestCase): @@ -89,8 +90,8 @@ def test_compile_traced(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) # Changing the device ID deliberately. It should still run on correct device ID by context switching torchtrt.set_device(1) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_compile_script(self): torchtrt.set_device(0) @@ -108,8 +109,8 @@ def test_compile_script(self): trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) # Changing the device ID deliberately. It should still run on correct device ID by context switching torchtrt.set_device(1) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") def test_suite(): diff --git a/tests/py/integrations/test_to_backend_api.py b/tests/py/integrations/test_to_backend_api.py index 16d839b1b0..b860d0333c 100644 --- a/tests/py/integrations/test_to_backend_api.py +++ b/tests/py/integrations/test_to_backend_api.py @@ -2,7 +2,7 @@ import torch_tensorrt as torchtrt import torch import torchvision.models as models - +from utils import cosine_similarity, COSINE_THRESHOLD class TestToBackendLowering(unittest.TestCase): def setUp(self): @@ -31,10 +31,9 @@ def setUp(self): def test_to_backend_lowering(self): trt_mod = torch._C._jit_to_backend("tensorrt", self.scripted_model, self.spec) - same = ( - (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max() - ) - self.assertTrue(same < 2e-3) + cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestToBackendLowering TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + if __name__ == "__main__": diff --git a/tests/py/integrations/test_trt_intercompatibility.py b/tests/py/integrations/test_trt_intercompatibility.py index 96b47b7ccc..e82f1e54ca 100644 --- a/tests/py/integrations/test_trt_intercompatibility.py +++ b/tests/py/integrations/test_trt_intercompatibility.py @@ -3,7 +3,7 @@ import torch import torchvision.models as models import tensorrt as trt - +from utils import cosine_similarity, COSINE_THRESHOLD class TestPyTorchToTRTEngine(unittest.TestCase): def test_pt_to_trt(self): @@ -42,8 +42,8 @@ def test_pt_to_trt(self): device="cuda:0" ).cuda_stream, ) - same = (out - self.ts_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) + cos_sim = cosine_similarity(self.model(self.input), out) + self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestPyTorchToTRTEngine TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") if __name__ == "__main__": diff --git a/tests/py/api/custom_models.py b/tests/py/models/custom_models.py similarity index 100% rename from tests/py/api/custom_models.py rename to tests/py/models/custom_models.py diff --git a/tests/py/api/test_models.py b/tests/py/models/test_models.py similarity index 100% rename from tests/py/api/test_models.py rename to tests/py/models/test_models.py diff --git a/tests/py/api/test_multiple_registered_engines.py b/tests/py/models/test_multiple_registered_engines.py similarity index 100% rename from tests/py/api/test_multiple_registered_engines.py rename to tests/py/models/test_multiple_registered_engines.py diff --git a/tests/py/models/utils.py b/tests/py/models/utils.py new file mode 100644 index 0000000000..a43b54a4a7 --- /dev/null +++ b/tests/py/models/utils.py @@ -0,0 +1,14 @@ +import torch + +COSINE_THRESHOLD=0.99 + +def cosine_similarity(gt_tensor, pred_tensor): + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) + res = res.cpu().detach().item() + + return res From 7e6b36cf0e87922232a86751deba379bd387d0c4 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 31 Aug 2022 17:22:21 -0700 Subject: [PATCH 04/12] chore: add missing scripts Signed-off-by: Dheeraj Peri --- tests/py/hw/utils.py | 14 ++++++++++++++ tests/py/integrations/utils.py | 14 ++++++++++++++ tests/py/utils.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 tests/py/hw/utils.py create mode 100644 tests/py/integrations/utils.py create mode 100644 tests/py/utils.py diff --git a/tests/py/hw/utils.py b/tests/py/hw/utils.py new file mode 100644 index 0000000000..a43b54a4a7 --- /dev/null +++ b/tests/py/hw/utils.py @@ -0,0 +1,14 @@ +import torch + +COSINE_THRESHOLD=0.99 + +def cosine_similarity(gt_tensor, pred_tensor): + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) + res = res.cpu().detach().item() + + return res diff --git a/tests/py/integrations/utils.py b/tests/py/integrations/utils.py new file mode 100644 index 0000000000..a43b54a4a7 --- /dev/null +++ b/tests/py/integrations/utils.py @@ -0,0 +1,14 @@ +import torch + +COSINE_THRESHOLD=0.99 + +def cosine_similarity(gt_tensor, pred_tensor): + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) + res = res.cpu().detach().item() + + return res diff --git a/tests/py/utils.py b/tests/py/utils.py new file mode 100644 index 0000000000..a43b54a4a7 --- /dev/null +++ b/tests/py/utils.py @@ -0,0 +1,14 @@ +import torch + +COSINE_THRESHOLD=0.99 + +def cosine_similarity(gt_tensor, pred_tensor): + gt_tensor = gt_tensor.flatten().to(torch.float32) + pred_tensor = pred_tensor.flatten().to(torch.float32) + if torch.sum(gt_tensor) == 0.0 or torch.sum(pred_tensor) == 0.0: + if torch.allclose(gt_tensor, pred_tensor, atol=1e-4, rtol=1e-4, equal_nan=True): + return 1.0 + res = torch.nn.functional.cosine_similarity(gt_tensor, pred_tensor, dim=0, eps=1e-6) + res = res.cpu().detach().item() + + return res From ed75e9da13596e6bf11f5b1f6402392865f12159 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 31 Aug 2022 17:32:32 -0700 Subject: [PATCH 05/12] chore: Linter fixes Signed-off-by: Dheeraj Peri --- .github/workflows/docgen.yml | 2 +- .github/workflows/linter.yml | 2 +- noxfile.py | 27 ++++++++--- tests/cpp/test_collections.cpp | 2 +- tests/cpp/test_modules_as_engines.cpp | 4 +- tests/py/api/test_collections.py | 42 +++++++++++++---- tests/py/api/test_e2e_behavior.py | 1 + tests/py/api/test_embed_engines.py | 24 ++++++++-- tests/py/api/test_module_fallback.py | 15 ++++-- tests/py/api/test_operator_fallback.py | 11 ++++- tests/py/api/test_ts_backend.py | 32 ++++++++++--- tests/py/api/utils.py | 3 +- tests/py/hw/test_api_dla.py | 10 +++- tests/py/hw/test_multi_gpu.py | 21 +++++++-- tests/py/hw/utils.py | 3 +- tests/py/integrations/test_to_backend_api.py | 7 ++- .../test_trt_intercompatibility.py | 6 ++- tests/py/integrations/utils.py | 3 +- tests/py/models/custom_models.py | 1 + tests/py/models/test_models.py | 47 +++++++++++++++---- .../test_multiple_registered_engines.py | 20 ++++++-- tests/py/models/utils.py | 3 +- tests/py/utils.py | 3 +- tests/util/util.cpp | 6 +-- 24 files changed, 227 insertions(+), 68 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index 7b66b98be5..61af5bc5d9 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -31,7 +31,7 @@ jobs: - name: Set up Python 3.9.4 uses: actions/setup-python@v2 with: - python-version: 3.9.4 + python-version: 3.9.4 - uses: actions/checkout@v2 with: ref: ${{github.head_ref}} diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 58c8440684..b56a233169 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -39,7 +39,7 @@ jobs: pip3 install -r $GITHUB_WORKSPACE/.github/scripts/requirements.txt pip3 install -r $GITHUB_WORKSPACE/requirements-dev.txt - name: Lint C++ - run: | + run: | cd $GITHUB_WORKSPACE python3 $GITHUB_WORKSPACE/.github/scripts/run_cpp_linter.py env: diff --git a/noxfile.py b/noxfile.py index 2b8e2da9b3..1f7c1433af 100644 --- a/noxfile.py +++ b/noxfile.py @@ -31,7 +31,7 @@ print("Using dependencies from host python") # Set epochs to train VGG model for accuracy tests -EPOCHS=25 +EPOCHS = 25 SUPPORTED_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] @@ -39,6 +39,7 @@ "l0_api_tests-" + "{}.{}".format(sys.version_info.major, sys.version_info.minor) ] + def install_deps(session): print("Installing deps") session.install("-r", os.path.join(TOP_DIR, "py", "requirements.txt")) @@ -110,7 +111,9 @@ def train_model(session): str(EPOCHS), ) - session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth") + session.run_always( + "python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth" + ) def finetune_model(session): @@ -135,7 +138,7 @@ def finetune_model(session): "--start-from", str(EPOCHS), "--epochs", - str(EPOCHS+1), + str(EPOCHS + 1), env={"PYTHONPATH": PYT_PATH}, ) @@ -143,7 +146,7 @@ def finetune_model(session): session.run_always( "python", "export_qat.py", - "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth", + "vgg16_ckpts/ckpt_epoch" + str(EPOCHS + 1) + ".pth", env={"PYTHONPATH": PYT_PATH}, ) else: @@ -161,11 +164,15 @@ def finetune_model(session): "--start-from", str(EPOCHS), "--epochs", - str(EPOCHS+1), + str(EPOCHS + 1), ) # Export model - session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth") + session.run_always( + "python", + "export_qat.py", + "vgg16_ckpts/ckpt_epoch" + str(EPOCHS + 1) + ".pth", + ) def cleanup(session): @@ -195,6 +202,7 @@ def run_base_tests(session): else: session.run_always("pytest", test) + def run_model_tests(session): print("Running model tests") session.chdir(os.path.join(TOP_DIR, "tests/py")) @@ -258,7 +266,7 @@ def run_trt_compatibility_tests(session): session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ "integrations/test_trt_intercompatibility.py", - #"ptq/test_ptq_trt_calibrator.py", + # "ptq/test_ptq_trt_calibrator.py", ] for test in tests: if USE_HOST_DEPS: @@ -310,6 +318,7 @@ def run_l0_dla_tests(session): run_base_tests(session) cleanup(session) + def run_l1_model_tests(session): if not USE_HOST_DEPS: install_deps(session) @@ -318,6 +327,7 @@ def run_l1_model_tests(session): run_model_tests(session) cleanup(session) + def run_l1_int8_accuracy_tests(session): if not USE_HOST_DEPS: install_deps(session) @@ -352,16 +362,19 @@ def l0_api_tests(session): """When a developer needs to check correctness for a PR or something""" run_l0_api_tests(session) + @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l0_dla_tests(session): """When a developer needs to check basic api functionality using host dependencies""" run_l0_dla_tests(session) + @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l1_model_tests(session): """When a developer needs to check correctness for a PR or something""" run_l1_model_tests(session) + @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l1_int8_accuracy_tests(session): """Checking accuracy performance on various usecases""" diff --git a/tests/cpp/test_collections.cpp b/tests/cpp/test_collections.cpp index 3318aec99d..e3f0d91dfe 100644 --- a/tests/cpp/test_collections.cpp +++ b/tests/cpp/test_collections.cpp @@ -192,7 +192,7 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) { auto trt_out = trt_mod.forward(complex_inputs); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( - out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); + out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5)); ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual( out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5)); } diff --git a/tests/cpp/test_modules_as_engines.cpp b/tests/cpp/test_modules_as_engines.cpp index 21670acdaf..430ce8201e 100644 --- a/tests/cpp/test_modules_as_engines.cpp +++ b/tests/cpp/test_modules_as_engines.cpp @@ -14,8 +14,8 @@ TEST_P(CppAPITests, ModuleAsEngineIsClose) { jit_results.push_back(jit_results_ivalues.toTensor()); auto trt_results = torch_tensorrt::tests::util::RunModuleForwardAsEngine(mod, inputs); - ASSERT_TRUE( - torch_tensorrt::tests::util::cosineSimEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual( + jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold)); } #ifndef DISABLE_TEST_IN_CI diff --git a/tests/py/api/test_collections.py b/tests/py/api/test_collections.py index 88147e005e..936a4d5c73 100644 --- a/tests/py/api/test_collections.py +++ b/tests/py/api/test_collections.py @@ -41,8 +41,13 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - cos_sim = cosine_similarity(self.model(self.input, self.input), trt_mod(self.input, self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"standard_tensor_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + cos_sim = cosine_similarity( + self.model(self.input, self.input), trt_mod(self.input, self.input) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"standard_tensor_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestTupleInput(unittest.TestCase): @@ -65,8 +70,13 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - cos_sim = cosine_similarity(self.model((self.input, self.input)), trt_mod((self.input, self.input))) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"tuple_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + cos_sim = cosine_similarity( + self.model((self.input, self.input)), trt_mod((self.input, self.input)) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"tuple_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestListInput(unittest.TestCase): @@ -87,8 +97,13 @@ def test_compile(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - cos_sim = cosine_similarity(self.model([self.input, self.input]), trt_mod([self.input, self.input])) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + cos_sim = cosine_similarity( + self.model([self.input, self.input]), trt_mod([self.input, self.input]) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"list_input_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestTupleInputOutput(unittest.TestCase): @@ -115,7 +130,10 @@ def test_compile(self): pyt_out = self.model((self.input, self.input)) for (t, p) in zip(trt_out, pyt_out): cos_sim = cosine_similarity(t, p) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"tuple_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"tuple_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestListInputOutput(unittest.TestCase): @@ -143,7 +161,10 @@ def test_compile(self): for (t, p) in zip(trt_out, pyt_out): cos_sim = cosine_similarity(t, p) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"list_input_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestListInputTupleOutput(unittest.TestCase): @@ -170,7 +191,10 @@ def test_compile(self): pyt_out = self.model((self.input, self.input)) for (t, p) in zip(trt_out, pyt_out): cos_sim = cosine_similarity(t, p) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"list_input_tuple_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"list_input_tuple_output_scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) if __name__ == "__main__": diff --git a/tests/py/api/test_e2e_behavior.py b/tests/py/api/test_e2e_behavior.py index 35cd3509dc..385fe916f4 100644 --- a/tests/py/api/test_e2e_behavior.py +++ b/tests/py/api/test_e2e_behavior.py @@ -5,6 +5,7 @@ import copy from typing import Dict + class TestInputTypeDefaultsFP32Model(unittest.TestCase): def test_input_use_default_fp32(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") diff --git a/tests/py/api/test_embed_engines.py b/tests/py/api/test_embed_engines.py index 15bbffa62b..d21e139eca 100644 --- a/tests/py/api/test_embed_engines.py +++ b/tests/py/api/test_embed_engines.py @@ -7,6 +7,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestModelToEngineToModel(unittest.TestCase): def test_resnet50(self): self.model = models.resnet50(pretrained=True).eval().to("cuda") @@ -26,13 +27,20 @@ def test_resnet50(self): } self.scripted_model = torch.jit.script(self.model) - trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.scripted_model, "forward", **compile_spec) + trt_engine = torchtrt.ts.convert_method_to_trt_engine( + self.scripted_model, "forward", **compile_spec + ) trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_efficientnet_b0(self): - self.model = timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + self.model = ( + timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + ) self.input = torch.randn((1, 3, 224, 224)).to("cuda") compile_spec = { @@ -49,11 +57,17 @@ def test_efficientnet_b0(self): } self.scripted_model = torch.jit.script(self.model) - trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.scripted_model, "forward", **compile_spec) + trt_engine = torchtrt.ts.convert_method_to_trt_engine( + self.scripted_model, "forward", **compile_spec + ) trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/py/api/test_module_fallback.py b/tests/py/api/test_module_fallback.py index 5d5fc425c2..5eda2cdbfc 100644 --- a/tests/py/api/test_module_fallback.py +++ b/tests/py/api/test_module_fallback.py @@ -6,6 +6,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestModuleFallback(unittest.TestCase): def test_fallback_resnet18(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") @@ -25,7 +26,10 @@ def test_fallback_resnet18(self): } trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_fallback_mobilenet_v2(self): self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") @@ -41,12 +45,17 @@ def test_fallback_mobilenet_v2(self): "gpu_id": 0, }, "enabled_precisions": {torch.float}, - "torch_executed_modules": ["torchvision.models.mobilenetv2.ConvBNActivation"], + "torch_executed_modules": [ + "torchvision.models.mobilenetv2.ConvBNActivation" + ], "min_block_size": 5, } trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) if __name__ == "__main__": diff --git a/tests/py/api/test_operator_fallback.py b/tests/py/api/test_operator_fallback.py index 25d1b7cd92..302a663e24 100644 --- a/tests/py/api/test_operator_fallback.py +++ b/tests/py/api/test_operator_fallback.py @@ -6,6 +6,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestFallbackModels(unittest.TestCase): def test_fallback_resnet18(self): self.model = models.resnet18(pretrained=True).eval().to("cuda") @@ -25,7 +26,10 @@ def test_fallback_resnet18(self): } trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_fallback_mobilenet_v2(self): self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") @@ -45,7 +49,10 @@ def test_fallback_mobilenet_v2(self): } trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Mobilenet V2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) if __name__ == "__main__": diff --git a/tests/py/api/test_ts_backend.py b/tests/py/api/test_ts_backend.py index 891f4ba178..e56ab4f902 100644 --- a/tests/py/api/test_ts_backend.py +++ b/tests/py/api/test_ts_backend.py @@ -6,6 +6,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestCompile(unittest.TestCase): def test_compile_traced(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -27,7 +28,10 @@ def test_compile_traced(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_compile_script(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -41,7 +45,10 @@ def test_compile_script(self): enabled_precisions={torch.float}, ) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_compile_global(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -54,7 +61,10 @@ def test_compile_global(self): enabled_precisions={torch.float}, ) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_from_torch_tensor(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -71,7 +81,10 @@ def test_from_torch_tensor(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_device(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -85,7 +98,10 @@ def test_device(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_default_device(self): self.model = models.vgg16(pretrained=True).eval().to("cuda") @@ -95,7 +111,11 @@ def test_default_device(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"VGG16 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + class TestCheckMethodOpSupport(unittest.TestCase): def test_check_support(self): diff --git a/tests/py/api/utils.py b/tests/py/api/utils.py index a43b54a4a7..b1e6632ec3 100644 --- a/tests/py/api/utils.py +++ b/tests/py/api/utils.py @@ -1,6 +1,7 @@ import torch -COSINE_THRESHOLD=0.99 +COSINE_THRESHOLD = 0.99 + def cosine_similarity(gt_tensor, pred_tensor): gt_tensor = gt_tensor.flatten().to(torch.float32) diff --git a/tests/py/hw/test_api_dla.py b/tests/py/hw/test_api_dla.py index ae6005bb1b..5328b92233 100644 --- a/tests/py/hw/test_api_dla.py +++ b/tests/py/hw/test_api_dla.py @@ -41,7 +41,10 @@ def test_compile_traced(self): trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"ModelTestCaseOnDLA traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_compile_script(self): compile_spec = { @@ -57,7 +60,10 @@ def test_compile_script(self): trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"ModelTestCaseOnDLA scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_suite(): diff --git a/tests/py/hw/test_multi_gpu.py b/tests/py/hw/test_multi_gpu.py index 033404c927..b6fa3f220b 100644 --- a/tests/py/hw/test_multi_gpu.py +++ b/tests/py/hw/test_multi_gpu.py @@ -37,7 +37,10 @@ def test_compile_traced(self): torchtrt.set_device(self.target_gpu) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) torchtrt.set_device(0) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestMultiGpuSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_compile_script(self): torchtrt.set_device(0) @@ -56,8 +59,10 @@ def test_compile_script(self): torchtrt.set_device(self.target_gpu) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) torchtrt.set_device(0) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") - + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestMultiGpuSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) class TestMultiGpuSerializeDeserializeSwitching(ModelTestCase): @@ -91,7 +96,10 @@ def test_compile_traced(self): # Changing the device ID deliberately. It should still run on correct device ID by context switching torchtrt.set_device(1) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestMultiGpuSerializeDeserializeSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_compile_script(self): torchtrt.set_device(0) @@ -110,7 +118,10 @@ def test_compile_script(self): # Changing the device ID deliberately. It should still run on correct device ID by context switching torchtrt.set_device(1) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestMultiGpuSerializeDeserializeSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_suite(): diff --git a/tests/py/hw/utils.py b/tests/py/hw/utils.py index a43b54a4a7..b1e6632ec3 100644 --- a/tests/py/hw/utils.py +++ b/tests/py/hw/utils.py @@ -1,6 +1,7 @@ import torch -COSINE_THRESHOLD=0.99 +COSINE_THRESHOLD = 0.99 + def cosine_similarity(gt_tensor, pred_tensor): gt_tensor = gt_tensor.flatten().to(torch.float32) diff --git a/tests/py/integrations/test_to_backend_api.py b/tests/py/integrations/test_to_backend_api.py index b860d0333c..0f74a3af15 100644 --- a/tests/py/integrations/test_to_backend_api.py +++ b/tests/py/integrations/test_to_backend_api.py @@ -4,6 +4,7 @@ import torchvision.models as models from utils import cosine_similarity, COSINE_THRESHOLD + class TestToBackendLowering(unittest.TestCase): def setUp(self): self.input = torch.randn((1, 3, 300, 300)).to("cuda") @@ -32,8 +33,10 @@ def setUp(self): def test_to_backend_lowering(self): trt_mod = torch._C._jit_to_backend("tensorrt", self.scripted_model, self.spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestToBackendLowering TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") - + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestToBackendLowering TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) if __name__ == "__main__": diff --git a/tests/py/integrations/test_trt_intercompatibility.py b/tests/py/integrations/test_trt_intercompatibility.py index e82f1e54ca..b938e4a1ac 100644 --- a/tests/py/integrations/test_trt_intercompatibility.py +++ b/tests/py/integrations/test_trt_intercompatibility.py @@ -5,6 +5,7 @@ import tensorrt as trt from utils import cosine_similarity, COSINE_THRESHOLD + class TestPyTorchToTRTEngine(unittest.TestCase): def test_pt_to_trt(self): self.model = models.resnet18(pretrained=True).eval().to("cuda:0") @@ -43,7 +44,10 @@ def test_pt_to_trt(self): ).cuda_stream, ) cos_sim = cosine_similarity(self.model(self.input), out) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestPyTorchToTRTEngine TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TestPyTorchToTRTEngine TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) if __name__ == "__main__": diff --git a/tests/py/integrations/utils.py b/tests/py/integrations/utils.py index a43b54a4a7..b1e6632ec3 100644 --- a/tests/py/integrations/utils.py +++ b/tests/py/integrations/utils.py @@ -1,6 +1,7 @@ import torch -COSINE_THRESHOLD=0.99 +COSINE_THRESHOLD = 0.99 + def cosine_similarity(gt_tensor, pred_tensor): gt_tensor = gt_tensor.flatten().to(torch.float32) diff --git a/tests/py/models/custom_models.py b/tests/py/models/custom_models.py index c6c0bb4c68..a19b9ca81c 100644 --- a/tests/py/models/custom_models.py +++ b/tests/py/models/custom_models.py @@ -1,6 +1,7 @@ import torch from transformers import BertModel, BertTokenizer, BertConfig + def BertModule(): model_name = "bert-base-uncased" enc = BertTokenizer.from_pretrained(model_name) diff --git a/tests/py/models/test_models.py b/tests/py/models/test_models.py index 84860b9305..97a454c610 100644 --- a/tests/py/models/test_models.py +++ b/tests/py/models/test_models.py @@ -8,6 +8,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestModels(unittest.TestCase): def test_resnet50(self): self.model = models.resnet50(pretrained=True).eval().to("cuda") @@ -28,7 +29,10 @@ def test_resnet50(self): trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_mobilenet_v2(self): self.model = models.mobilenet_v2(pretrained=True).eval().to("cuda") @@ -49,10 +53,15 @@ def test_mobilenet_v2(self): trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_efficientnet_b0(self): - self.model = timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + self.model = ( + timm.create_model("efficientnet_b0", pretrained=True).eval().to("cuda") + ) self.input = torch.randn((1, 3, 224, 224)).to("cuda") compile_spec = { @@ -70,7 +79,10 @@ def test_efficientnet_b0(self): trt_mod = torchtrt.compile(self.model, **compile_spec) cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_bert_base_uncased(self): self.model = cm.BertModule().cuda() @@ -78,8 +90,16 @@ def test_bert_base_uncased(self): compile_spec = { "inputs": [ - torchtrt.Input(self.input.shape, dtype=self.input.dtype, format=torch.contiguous_format), - torchtrt.Input(self.input.shape, dtype=self.input.dtype, format=torch.contiguous_format) + torchtrt.Input( + self.input.shape, + dtype=self.input.dtype, + format=torch.contiguous_format, + ), + torchtrt.Input( + self.input.shape, + dtype=self.input.dtype, + format=torch.contiguous_format, + ), ], "device": { "device_type": torchtrt.DeviceType.GPU, @@ -95,7 +115,10 @@ def test_bert_base_uncased(self): trt_model_outputs = trt_mod(self.input, self.input) for out, trt_out in zip(model_outputs, trt_model_outputs): cos_sim = cosine_similarity(out, trt_out) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) def test_resnet50_half(self): self.model = models.resnet50(pretrained=True).eval().to("cuda") @@ -117,8 +140,14 @@ def test_resnet50_half(self): } trt_mod = torchtrt.compile(self.scripted_model, **compile_spec) - cos_sim = cosine_similarity(self.model.half()(self.input.half()), trt_mod(self.input.half())) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + cos_sim = cosine_similarity( + self.model.half()(self.input.half()), trt_mod(self.input.half()) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet50 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/py/models/test_multiple_registered_engines.py b/tests/py/models/test_multiple_registered_engines.py index fb201f9d8f..98f012597b 100644 --- a/tests/py/models/test_multiple_registered_engines.py +++ b/tests/py/models/test_multiple_registered_engines.py @@ -8,6 +8,7 @@ from typing import Dict from utils import cosine_similarity, COSINE_THRESHOLD + class TestModelToEngineToModel(unittest.TestCase): def test_multiple_engines(self): self.resnet18 = models.resnet18(pretrained=True).eval().to("cuda") @@ -30,11 +31,22 @@ def test_multiple_engines(self): rn18_trt_mod = torchtrt.compile(self.resnet18, **compile_spec) rn50_trt_mod = torchtrt.compile(self.resnet50, **compile_spec) - cos_sim = cosine_similarity(self.resnet18(self.input1), rn18_trt_mod(self.input1)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") + cos_sim = cosine_similarity( + self.resnet18(self.input1), rn18_trt_mod(self.input1) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + cos_sim = cosine_similarity( + self.resnet50(self.input1), rn50_trt_mod(self.input1) + ) + self.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) - cos_sim = cosine_similarity(self.resnet50(self.input1), rn50_trt_mod(self.input1)) - self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"Resnet50 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}") if __name__ == "__main__": unittest.main() diff --git a/tests/py/models/utils.py b/tests/py/models/utils.py index a43b54a4a7..b1e6632ec3 100644 --- a/tests/py/models/utils.py +++ b/tests/py/models/utils.py @@ -1,6 +1,7 @@ import torch -COSINE_THRESHOLD=0.99 +COSINE_THRESHOLD = 0.99 + def cosine_similarity(gt_tensor, pred_tensor): gt_tensor = gt_tensor.flatten().to(torch.float32) diff --git a/tests/py/utils.py b/tests/py/utils.py index a43b54a4a7..b1e6632ec3 100644 --- a/tests/py/utils.py +++ b/tests/py/utils.py @@ -1,6 +1,7 @@ import torch -COSINE_THRESHOLD=0.99 +COSINE_THRESHOLD = 0.99 + def cosine_similarity(gt_tensor, pred_tensor): gt_tensor = gt_tensor.flatten().to(torch.float32) diff --git a/tests/util/util.cpp b/tests/util/util.cpp index 91004c06ff..8359d31576 100644 --- a/tests/util/util.cpp +++ b/tests/util/util.cpp @@ -6,9 +6,9 @@ namespace torch_tensorrt { namespace tests { namespace util { -bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = 0.99f){ - - torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0)); +bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = 0.99f) { + torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity( + computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0)); std::ostringstream ss; ss << computed_tensor << std::endl << gt_tensor << std::endl; LOG_GRAPH(ss.str()); From 3da78e984f0a13e9fea4068c2e60067c4374930d Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 1 Sep 2022 09:14:10 -0700 Subject: [PATCH 06/12] chore: Minor fix Signed-off-by: Dheeraj Peri --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1f7c1433af..0f6235f612 100644 --- a/noxfile.py +++ b/noxfile.py @@ -193,7 +193,7 @@ def run_base_tests(session): print("Running basic tests") session.chdir(os.path.join(TOP_DIR, "tests/py")) tests = [ - "api/test_e2e_behavior.py", + "api", "integrations/test_to_backend_api.py", ] for test in tests: @@ -371,7 +371,7 @@ def l0_dla_tests(session): @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def l1_model_tests(session): - """When a developer needs to check correctness for a PR or something""" + """When a user needs to test the functionality of standard models compilation and results""" run_l1_model_tests(session) From 0ca049f672a16f20f47f9c334f070b3b275b6bef Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 1 Sep 2022 09:21:35 -0700 Subject: [PATCH 07/12] chore: use rn18 instead of rn50 Signed-off-by: Dheeraj Peri --- tests/cpp/test_modules_as_engines.cpp | 2 +- tests/py/models/test_models.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/cpp/test_modules_as_engines.cpp b/tests/cpp/test_modules_as_engines.cpp index 430ce8201e..11b7a54fb0 100644 --- a/tests/cpp/test_modules_as_engines.cpp +++ b/tests/cpp/test_modules_as_engines.cpp @@ -24,7 +24,7 @@ INSTANTIATE_TEST_SUITE_P( ModuleAsEngineForwardIsCloseSuite, CppAPITests, testing::Values( - PathAndInput({"tests/modules/resnet50_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), + PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}), PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}))); diff --git a/tests/py/models/test_models.py b/tests/py/models/test_models.py index 97a454c610..6cc9759626 100644 --- a/tests/py/models/test_models.py +++ b/tests/py/models/test_models.py @@ -10,8 +10,8 @@ class TestModels(unittest.TestCase): - def test_resnet50(self): - self.model = models.resnet50(pretrained=True).eval().to("cuda") + def test_resnet18(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") compile_spec = { @@ -120,8 +120,8 @@ def test_bert_base_uncased(self): msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) - def test_resnet50_half(self): - self.model = models.resnet50(pretrained=True).eval().to("cuda") + def test_resnet18_half(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") self.input = torch.randn((1, 3, 224, 224)).to("cuda") self.scripted_model = torch.jit.script(self.model) self.scripted_model.half() From c8640963e8e614bd30dd42a43c86091e0c31ed89 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 1 Sep 2022 13:28:02 -0700 Subject: [PATCH 08/12] chore: Add cpp tests with cosine sim Signed-off-by: Dheeraj Peri --- tests/core/partitioning/BUILD | 16 ++++ .../test_fallback_graph_output.cpp | 69 +++++++++++++++++ tests/cpp/BUILD | 47 ++++++++++++ tests/cpp/test_compiled_modules.cpp | 60 +++++++++++++++ tests/cpp/test_module_fallback.cpp | 74 +++++++++++++++++++ .../cpp/test_multiple_registered_engines.cpp | 66 +++++++++++++++++ 6 files changed, 332 insertions(+) create mode 100644 tests/core/partitioning/test_fallback_graph_output.cpp create mode 100644 tests/cpp/test_compiled_modules.cpp create mode 100644 tests/cpp/test_module_fallback.cpp create mode 100644 tests/cpp/test_multiple_registered_engines.cpp diff --git a/tests/core/partitioning/BUILD b/tests/core/partitioning/BUILD index 5f90be2972..83722b4271 100644 --- a/tests/core/partitioning/BUILD +++ b/tests/core/partitioning/BUILD @@ -55,6 +55,21 @@ cc_test( }), ) +cc_test( + name = "test_fallback_graph_output", + srcs = ["test_fallback_graph_output.cpp"], + data = [ + ":jit_models", + ], + deps = [ + "//tests/util", + "@googletest//:gtest_main", + ] + select({ + ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], + "//conditions:default": ["@libtorch//:libtorch"], + }), +) + cc_test( name = "test_loop_fallback", srcs = ["test_loop_fallback.cpp"], @@ -89,6 +104,7 @@ test_suite( name = "partitioning_tests", tests = [ ":test_conditionals", + ":test_fallback_graph_output", ":test_loading_model", ":test_loop_fallback", ":test_resolve_nontensor_inputs", diff --git a/tests/core/partitioning/test_fallback_graph_output.cpp b/tests/core/partitioning/test_fallback_graph_output.cpp new file mode 100644 index 0000000000..3da717074a --- /dev/null +++ b/tests/core/partitioning/test_fallback_graph_output.cpp @@ -0,0 +1,69 @@ +#include +#include +#include "core/compiler.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/script.h" + +#ifndef DISABLE_TEST_IN_CI + +TEST(Partitioning, ComputeResNet50FallbackGraphCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/resnet50_traced.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + return; + } + + const std::vector> input_shapes = {{1, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + } + + std::vector input_ranges{torch_tensorrt::core::ir::Input({1, 3, 224, 224})}; + + torch_tensorrt::core::CompileSpec cfg(input_ranges); + cfg.partition_info.enabled = true; + cfg.partition_info.forced_fallback_operators.push_back("aten::add"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99)); +} + +TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/mobilenet_v2_traced.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + return; + } + + const std::vector> input_shapes = {{1, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + } + + std::vector input_ranges{torch_tensorrt::core::ir::Input({1, 3, 224, 224})}; + auto g = mod.get_method("forward").graph(); + torch_tensorrt::core::CompileSpec cfg(input_ranges); + cfg.partition_info.enabled = true; + cfg.partition_info.forced_fallback_operators.push_back("aten::hardtanh"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99)); +} +#endif diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD index ea2c6ae752..3d56682189 100644 --- a/tests/cpp/BUILD +++ b/tests/cpp/BUILD @@ -13,9 +13,12 @@ test_suite( name = "api_tests", tests = [ ":test_collections", + ":test_compiled_modules", ":test_default_input_types", ":test_example_tensors", + ":test_module_fallback", ":test_modules_as_engines", + ":test_multiple_registered_engines", ":test_runtime_thread_safety", ":test_serialization", ], @@ -25,9 +28,12 @@ test_suite( name = "aarch64_api_tests", tests = [ ":test_collections", + ":test_compiled_modules", ":test_default_input_types", ":test_example_tensors", + ":test_module_fallback", ":test_modules_as_engines", + ":test_multiple_registered_engines", ":test_runtime_thread_safety", ":test_serialization", ], @@ -66,6 +72,21 @@ cc_test( ], ) +cc_test( + name = "test_multiple_registered_engines", + srcs = ["test_multiple_registered_engines.cpp"], + data = [ + "//tests/modules:jit_models", + ], + deps = [ + "//tests/util", + "@googletest//:gtest_main", + ] + select({ + ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], + "//conditions:default": ["@libtorch//:libtorch"], + }), +) + cc_test( name = "test_modules_as_engines", timeout = "long", @@ -89,6 +110,21 @@ cc_test( ], ) +cc_test( + name = "test_module_fallback", + srcs = ["test_module_fallback.cpp"], + data = [ + "//tests/modules:jit_models", + ], + deps = [ + "//tests/util", + "@googletest//:gtest_main", + ] + select({ + ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], + "//conditions:default": ["@libtorch//:libtorch"], + }), +) + cc_test( name = "test_collections", srcs = ["test_collections.cpp"], @@ -104,6 +140,17 @@ cc_test( }), ) +cc_test( + name = "test_compiled_modules", + srcs = ["test_compiled_modules.cpp"], + data = [ + "//tests/modules:jit_models", + ], + deps = [ + ":cpp_api_test", + ], +) + cc_test( name = "test_multi_gpu_serde", srcs = ["test_multi_gpu_serde.cpp"], diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp new file mode 100644 index 0000000000..e1e923b47a --- /dev/null +++ b/tests/cpp/test_compiled_modules.cpp @@ -0,0 +1,60 @@ +#include "cpp_api_test.h" + +TEST_P(CppAPITests, CompiledModuleIsClose) { + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + std::vector shapes; + for (uint64_t i = 0; i < input_shapes.size(); i++) { + auto in = at::randint(5, input_shapes[i], {at::kCUDA}).to(input_types[i]); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + auto in_spec = torch_tensorrt::Input(input_shapes[i]); + in_spec.dtype = input_types[i]; + shapes.push_back(in_spec); + std::cout << in_spec << std::endl; + } + + torch::jit::IValue jit_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod, jit_inputs_ivalues); + std::vector jit_results; + if (jit_results_ivalues.isTuple()) { + auto tuple = jit_results_ivalues.toTuple(); + for (auto t : tuple->elements()) { + jit_results.push_back(t.toTensor()); + } + } else { + jit_results.push_back(jit_results_ivalues.toTensor()); + } + + auto spec = torch_tensorrt::ts::CompileSpec(shapes); + spec.truncate_long_and_double = true; + + auto trt_mod = torch_tensorrt::ts::compile(mod, spec); + torch::jit::IValue trt_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(trt_mod, trt_inputs_ivalues); + std::vector trt_results; + if (trt_results_ivalues.isTuple()) { + auto tuple = trt_results_ivalues.toTuple(); + for (auto t : tuple->elements()) { + trt_results.push_back(t.toTensor()); + } + } else { + trt_results.push_back(trt_results_ivalues.toTensor()); + } + + for (size_t i = 0; i < trt_results.size(); i++) { + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), 0.99)); + } +} + +#ifndef DISABLE_TEST_IN_CI + +INSTANTIATE_TEST_SUITE_P( + CompiledModuleForwardIsCloseSuite, + CppAPITests, + testing::Values( + PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), + PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}), + PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-3}), + PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}, 8e-2}), + PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-2}))); + +#endif diff --git a/tests/cpp/test_module_fallback.cpp b/tests/cpp/test_module_fallback.cpp new file mode 100644 index 0000000000..bfdfc46b04 --- /dev/null +++ b/tests/cpp/test_module_fallback.cpp @@ -0,0 +1,74 @@ +#include +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/script.h" +#include "torch_tensorrt/torch_tensorrt.h" + +#ifndef DISABLE_TEST_IN_CI + +TEST(CppAPITest, ResNetModuleFallbacksCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + ASSERT_TRUE(false); + } + + const std::vector> input_shapes = {{1, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + } + + torch_tensorrt::ts::CompileSpec cfg(input_shapes); + cfg.torch_executed_modules.push_back("torchvision.models.resnet.BasicBlock"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99)); +} + +TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/mobilenet_v2_scripted.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + ASSERT_TRUE(false); + } + + const std::vector> input_shapes = {{1, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + } + + torch_tensorrt::ts::CompileSpec cfg(input_shapes); + cfg.min_block_size = 5; + cfg.torch_executed_modules.push_back("torchvision.models.mobilenetv2.ConvBNActivation"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); + + auto g = trt_mod.get_method("forward").graph(); + auto nodes = g->block()->nodes(); + std::size_t trt_count = 0; + for (const auto n : nodes) { + if (n->kind().toQualString() == std::string("tensorrt::execute_engine")) { + trt_count++; + } + } + ASSERT_TRUE(trt_count == 1); + + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99)); +} +#endif diff --git a/tests/cpp/test_multiple_registered_engines.cpp b/tests/cpp/test_multiple_registered_engines.cpp new file mode 100644 index 0000000000..16ae4c8a66 --- /dev/null +++ b/tests/cpp/test_multiple_registered_engines.cpp @@ -0,0 +1,66 @@ +#include +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/script.h" +#include "torch_tensorrt/torch_tensorrt.h" + +#ifndef DISABLE_TEST_IN_CI + +TEST(CppAPITest, CanRunMultipleEngines) { + torch::jit::script::Module mod1; + torch::jit::script::Module mod2; + try { + mod1 = torch::jit::load("tests/modules/resnet18_traced.jit.pt"); + mod2 = torch::jit::load("tests/modules/resnet18_traced.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + return; + } + + const std::vector> input_shapes = {{1, 3, 224, 224}}; + + std::vector jit1_inputs_ivalues; + std::vector trt1_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit1_inputs_ivalues.push_back(in.clone()); + trt1_inputs_ivalues.push_back(in.clone()); + } + + std::vector jit2_inputs_ivalues; + std::vector trt2_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit2_inputs_ivalues.push_back(in.clone()); + trt2_inputs_ivalues.push_back(in.clone()); + } + + torch::jit::IValue jit1_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod1, jit1_inputs_ivalues); + std::vector jit1_results; + jit1_results.push_back(jit1_results_ivalues.toTensor()); + + torch::jit::IValue jit2_results_ivalues = torch_tensorrt::tests::util::RunModuleForward(mod2, jit2_inputs_ivalues); + std::vector jit2_results; + jit2_results.push_back(jit2_results_ivalues.toTensor()); + + auto trt_mod1 = torch_tensorrt::ts::compile(mod1, input_shapes); + torch::jit::IValue trt1_results_ivalues = + torch_tensorrt::tests::util::RunModuleForward(trt_mod1, trt1_inputs_ivalues); + std::vector trt1_results; + trt1_results.push_back(trt1_results_ivalues.toTensor()); + + auto trt_mod2 = torch_tensorrt::ts::compile(mod2, input_shapes); + torch::jit::IValue trt2_results_ivalues = + torch_tensorrt::tests::util::RunModuleForward(trt_mod2, trt2_inputs_ivalues); + std::vector trt2_results; + trt2_results.push_back(trt2_results_ivalues.toTensor()); + + for (size_t i = 0; i < trt1_results.size(); i++) { + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99)); + } + + for (size_t i = 0; i < trt2_results.size(); i++) { + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99)); + } +} +#endif From 8d8cbfd747a129367155f0c0695279077c448a23 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 1 Sep 2022 13:32:11 -0700 Subject: [PATCH 09/12] chore: linter fixes Signed-off-by: Dheeraj Peri --- tests/cpp/test_compiled_modules.cpp | 3 ++- tests/cpp/test_multiple_registered_engines.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp index e1e923b47a..3a81f0a531 100644 --- a/tests/cpp/test_compiled_modules.cpp +++ b/tests/cpp/test_compiled_modules.cpp @@ -41,7 +41,8 @@ TEST_P(CppAPITests, CompiledModuleIsClose) { } for (size_t i = 0; i < trt_results.size(); i++) { - ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), 0.99)); + ASSERT_TRUE( + torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), 0.99)); } } diff --git a/tests/cpp/test_multiple_registered_engines.cpp b/tests/cpp/test_multiple_registered_engines.cpp index 16ae4c8a66..658f59ca74 100644 --- a/tests/cpp/test_multiple_registered_engines.cpp +++ b/tests/cpp/test_multiple_registered_engines.cpp @@ -56,11 +56,13 @@ TEST(CppAPITest, CanRunMultipleEngines) { trt2_results.push_back(trt2_results_ivalues.toTensor()); for (size_t i = 0; i < trt1_results.size(); i++) { - ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual( + jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99)); } for (size_t i = 0; i < trt2_results.size(); i++) { - ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99)); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual( + jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99)); } } #endif From 13cc0248c321783aff3717c5c5179a6b0ee42913 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 6 Sep 2022 11:52:00 -0700 Subject: [PATCH 10/12] chore: Deepcopy other objects Signed-off-by: Dheeraj Peri --- py/torch_tensorrt/ts/_compile_spec.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index 8f24bc76ad..6f267978ec 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -226,7 +226,13 @@ def _parse_input_signature(input_signature: Any): def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec: # TODO: Use deepcopy to support partial compilation of collections - compile_spec = compile_spec_ + compile_spec = {} + for k, v in compile_spec_.items(): + if k != "calibrator": + compile_spec[k] = deepcopy(v) + else: + compile_spec[k] = v + info = _ts_C.CompileSpec() if len(compile_spec["inputs"]) > 0: From 749048cabe83725bd9a0208afb04b39556adbe4d Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 6 Sep 2022 18:07:40 -0700 Subject: [PATCH 11/12] fix: Fix deepcopy issues of PTQ calibrators Signed-off-by: Dheeraj Peri --- noxfile.py | 2 -- py/torch_tensorrt/ptq.py | 14 ++++++++++---- py/torch_tensorrt/ts/_compile_spec.py | 8 +------- tests/py/ptq/test_ptq_dataloader_calibrator.py | 8 ++++---- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/noxfile.py b/noxfile.py index 0f6235f612..eff8136fbb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -342,8 +342,6 @@ def run_l2_trt_compatibility_tests(session): if not USE_HOST_DEPS: install_deps(session) install_torch_trt(session) - download_models(session) - train_model(session) run_trt_compatibility_tests(session) cleanup(session) diff --git a/py/torch_tensorrt/ptq.py b/py/torch_tensorrt/ptq.py index 326f35f942..670690b433 100644 --- a/py/torch_tensorrt/ptq.py +++ b/py/torch_tensorrt/ptq.py @@ -55,6 +55,11 @@ def write_calibration_cache(self, cache): else: return b"" +# deepcopy (which involves pickling) is performed on the compile_spec internally during compilation. +# We register this __reduce__ function for pickler to identity the calibrator object returned by DataLoaderCalibrator during deepcopy. +# This should be the object's local name relative to the module https://docs.python.org/3/library/pickle.html#object.__reduce__ +def __reduce__(self): + return self.__class__.__name__ class DataLoaderCalibrator(object): """ @@ -114,24 +119,25 @@ def __new__(cls, *args, **kwargs): "get_batch": get_cache_mode_batch if use_cache else get_batch, "read_calibration_cache": read_calibration_cache, "write_calibration_cache": write_calibration_cache, + "__reduce__": __reduce__ # used when you deepcopy the DataLoaderCalibrator object } # Using type metaclass to construct calibrator class based on algorithm type if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION: return type( - "DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping + "Int8EntropyCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping )() elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2: return type( - "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping + "Int8EntropyCalibrator2", (_C.IInt8EntropyCalibrator2,), attribute_mapping )() elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION: return type( - "DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping + "Int8LegacyCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping )() elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION: return type( - "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping + "Int8MinMaxCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping )() else: log( diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index 6f267978ec..9616111caa 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -226,13 +226,7 @@ def _parse_input_signature(input_signature: Any): def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec: # TODO: Use deepcopy to support partial compilation of collections - compile_spec = {} - for k, v in compile_spec_.items(): - if k != "calibrator": - compile_spec[k] = deepcopy(v) - else: - compile_spec[k] = v - + compile_spec = deepcopy(compile_spec_) info = _ts_C.CompileSpec() if len(compile_spec["inputs"]) > 0: diff --git a/tests/py/ptq/test_ptq_dataloader_calibrator.py b/tests/py/ptq/test_ptq_dataloader_calibrator.py index 2ee1fa5b08..79c19dadbf 100644 --- a/tests/py/ptq/test_ptq_dataloader_calibrator.py +++ b/tests/py/ptq/test_ptq_dataloader_calibrator.py @@ -81,9 +81,6 @@ def test_compile_script(self): device=torch.device("cuda:0"), ) - fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) - log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) - compile_spec = { "inputs": [torchtrt.Input([1, 3, 32, 32])], "enabled_precisions": {torch.float, torch.int8}, @@ -96,8 +93,11 @@ def test_compile_script(self): "allow_gpu_fallback": False, }, } - trt_mod = torchtrt.ts.compile(self.model, **compile_spec) + + fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) + log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) + int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc From af2076110f6ae4448b56d0d7eb7884d0bc1aef81 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 6 Sep 2022 18:09:20 -0700 Subject: [PATCH 12/12] chore: linter fixes Signed-off-by: Dheeraj Peri --- py/torch_tensorrt/ptq.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/ptq.py b/py/torch_tensorrt/ptq.py index 670690b433..e7f3411cd5 100644 --- a/py/torch_tensorrt/ptq.py +++ b/py/torch_tensorrt/ptq.py @@ -55,12 +55,14 @@ def write_calibration_cache(self, cache): else: return b"" + # deepcopy (which involves pickling) is performed on the compile_spec internally during compilation. # We register this __reduce__ function for pickler to identity the calibrator object returned by DataLoaderCalibrator during deepcopy. # This should be the object's local name relative to the module https://docs.python.org/3/library/pickle.html#object.__reduce__ def __reduce__(self): return self.__class__.__name__ + class DataLoaderCalibrator(object): """ Constructs a calibrator class in TensorRT and uses pytorch dataloader to load/preproces @@ -119,7 +121,7 @@ def __new__(cls, *args, **kwargs): "get_batch": get_cache_mode_batch if use_cache else get_batch, "read_calibration_cache": read_calibration_cache, "write_calibration_cache": write_calibration_cache, - "__reduce__": __reduce__ # used when you deepcopy the DataLoaderCalibrator object + "__reduce__": __reduce__, # used when you deepcopy the DataLoaderCalibrator object } # Using type metaclass to construct calibrator class based on algorithm type @@ -129,7 +131,9 @@ def __new__(cls, *args, **kwargs): )() elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2: return type( - "Int8EntropyCalibrator2", (_C.IInt8EntropyCalibrator2,), attribute_mapping + "Int8EntropyCalibrator2", + (_C.IInt8EntropyCalibrator2,), + attribute_mapping, )() elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION: return type(