From bad20b81ed1c8babd598106cb078834ad6142145 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 13 Jul 2019 14:13:13 +0800 Subject: [PATCH 1/3] fix gpu test --- .../seq2seq/beam_search_ops_test.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow_addons/seq2seq/beam_search_ops_test.py b/tensorflow_addons/seq2seq/beam_search_ops_test.py index a8fd760d08..0e618e6894 100644 --- a/tensorflow_addons/seq2seq/beam_search_ops_test.py +++ b/tensorflow_addons/seq2seq/beam_search_ops_test.py @@ -71,9 +71,6 @@ def testBadParentValuesOnCPU(self): self.evaluate(beams) def testBadParentValuesOnGPU(self): - # TODO: Fix #348 issue - self.skipTest('Wait #348 to be fixed') - # Only want to run this test on CUDA devices, as gather_tree is not # registered for SYCL devices. if not tf.test.is_gpu_available(cuda_only=True): @@ -89,12 +86,14 @@ def testBadParentValuesOnGPU(self): expected_result = _transpose_batch_time([[[2, -1, 2], [6, 5, 6], [7, 8, 9], [10, 10, 10]]]) with tf.device("/device:GPU:0"): - beams = gather_tree( - step_ids=step_ids, - parent_ids=parent_ids, - max_sequence_lengths=max_sequence_lengths, - end_token=end_token) - self.assertAllEqual(expected_result, self.evaluate(beams)) + msg = r"parent id -1 at \(batch, time, beam\) == \(0, 0, 1\)" + with self.assertRaisesOpError(msg): + beams = gather_tree( + step_ids=step_ids, + parent_ids=parent_ids, + max_sequence_lengths=max_sequence_lengths, + end_token=end_token) + self.assertAllEqual(expected_result, self.evaluate(beams)) def testGatherTreeBatch(self): batch_size = 10 From c7513d4954fe1eea7de247b95e174bb11abd6e00 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sun, 4 Aug 2019 10:29:04 +0800 Subject: [PATCH 2/3] build gpu kernel for beam_search_ops --- tensorflow_addons/custom_ops/seq2seq/BUILD | 26 +++++++++++++++++-- .../seq2seq/cc/kernels/beam_search_ops.h | 2 +- .../cc/kernels/beam_search_ops_gpu.cu.cc | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tensorflow_addons/custom_ops/seq2seq/BUILD b/tensorflow_addons/custom_ops/seq2seq/BUILD index aac484b93d..4f95a02373 100644 --- a/tensorflow_addons/custom_ops/seq2seq/BUILD +++ b/tensorflow_addons/custom_ops/seq2seq/BUILD @@ -3,23 +3,45 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:public"]) load("@local_config_tf//:build_defs.bzl", "D_GLIBCXX_USE_CXX11_ABI") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured", "if_cuda") cc_binary( name = "_beam_search_ops.so", srcs = [ "cc/kernels/beam_search_ops.cc", "cc/kernels/beam_search_ops.h", - # "cc/kernels/beam_search_ops_gpu.cu.cc", "cc/ops/beam_search_ops.cc", ], copts = [ "-pthread", "-std=c++11", D_GLIBCXX_USE_CXX11_ABI, - ], + ] + if_cuda(["-DGOOGLE_CUDA=1"]), linkshared = 1, deps = [ "@local_config_tf//:libtensorflow_framework", "@local_config_tf//:tf_header_lib", + ] + if_cuda_is_configured([":beam_search_ops_gpu"]), +) + +cc_library( + name = "beam_search_ops_gpu", + srcs = [ + "cc/kernels/beam_search_ops.h", + "cc/kernels/beam_search_ops_gpu.cu.cc", ], + copts = if_cuda_is_configured([ + "-DGOOGLE_CUDA=1", + "-x cuda", + "-nvcc_options=relaxed-constexpr", + "-nvcc_options=ftz=true", + ]), + deps = [ + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", + ] + if_cuda_is_configured([ + "@local_config_cuda//cuda:cuda_libs", + "@local_config_cuda//cuda:cuda_headers", + ]), + alwayslink = 1, ) diff --git a/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops.h b/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops.h index e809b6f985..f11bb6cd8f 100644 --- a/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops.h +++ b/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops.h @@ -37,4 +37,4 @@ struct GatherTree { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_SEQ2SEQ_KERNELS_BEAM_SEARCH_OPS_H_ \ No newline at end of file +#endif // TENSORFLOW_CONTRIB_SEQ2SEQ_KERNELS_BEAM_SEARCH_OPS_H_ diff --git a/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops_gpu.cu.cc b/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops_gpu.cu.cc index bff67438aa..b6018be293 100644 --- a/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops_gpu.cu.cc +++ b/tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow_addons/custom_ops/seq2seq/cc/kernels/beam_search_ops.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" +#include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { namespace functor { From f006c9b098338870c84ea65668ca0a0e2adbae0d Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sun, 4 Aug 2019 10:29:48 +0800 Subject: [PATCH 3/3] remove assertion manager --- tensorflow_addons/seq2seq/beam_search_ops_test.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tensorflow_addons/seq2seq/beam_search_ops_test.py b/tensorflow_addons/seq2seq/beam_search_ops_test.py index 0e618e6894..14e7621e2c 100644 --- a/tensorflow_addons/seq2seq/beam_search_ops_test.py +++ b/tensorflow_addons/seq2seq/beam_search_ops_test.py @@ -86,14 +86,12 @@ def testBadParentValuesOnGPU(self): expected_result = _transpose_batch_time([[[2, -1, 2], [6, 5, 6], [7, 8, 9], [10, 10, 10]]]) with tf.device("/device:GPU:0"): - msg = r"parent id -1 at \(batch, time, beam\) == \(0, 0, 1\)" - with self.assertRaisesOpError(msg): - beams = gather_tree( - step_ids=step_ids, - parent_ids=parent_ids, - max_sequence_lengths=max_sequence_lengths, - end_token=end_token) - self.assertAllEqual(expected_result, self.evaluate(beams)) + beams = gather_tree( + step_ids=step_ids, + parent_ids=parent_ids, + max_sequence_lengths=max_sequence_lengths, + end_token=end_token) + self.assertAllEqual(expected_result, self.evaluate(beams)) def testGatherTreeBatch(self): batch_size = 10