From fd3490dad64c24c4bfad5639e16268db9e86d728 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 20 Sep 2019 14:46:45 -0700 Subject: [PATCH] Implement Addons>ParseTime operator. The parse time operator parses an input string according to the provided format string into a Unix time, the number of seconds / milliseconds / microseconds / nanoseconds elapsed since January 1, 1970 UTC. Fixes: https://github.com/tensorflow/addons/issues/492 --- tensorflow_addons/custom_ops/text/BUILD | 18 +++ .../text/cc/kernels/parse_time_kernel.cc | 103 ++++++++++++++++++ .../custom_ops/text/cc/ops/parse_time_op.cc | 71 ++++++++++++ tensorflow_addons/text/BUILD | 15 +++ tensorflow_addons/text/__init__.py | 5 +- tensorflow_addons/text/parse_time_op.py | 86 +++++++++++++++ tensorflow_addons/text/parse_time_op_test.py | 79 ++++++++++++++ 7 files changed, 376 insertions(+), 1 deletion(-) create mode 100644 tensorflow_addons/custom_ops/text/cc/kernels/parse_time_kernel.cc create mode 100644 tensorflow_addons/custom_ops/text/cc/ops/parse_time_op.cc create mode 100644 tensorflow_addons/text/parse_time_op.py create mode 100644 tensorflow_addons/text/parse_time_op_test.py diff --git a/tensorflow_addons/custom_ops/text/BUILD b/tensorflow_addons/custom_ops/text/BUILD index bb2902c557..3f2a03f3db 100644 --- a/tensorflow_addons/custom_ops/text/BUILD +++ b/tensorflow_addons/custom_ops/text/BUILD @@ -21,3 +21,21 @@ cc_binary( "@local_config_tf//:tf_header_lib", ], ) + +cc_binary( + name = "_parse_time_op.so", + srcs = [ + "cc/kernels/parse_time_kernel.cc", + "cc/ops/parse_time_op.cc", + ], + copts = [ + "-pthread", + "-std=c++11", + D_GLIBCXX_USE_CXX11_ABI, + ], + linkshared = 1, + deps = [ + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", + ], +) diff --git a/tensorflow_addons/custom_ops/text/cc/kernels/parse_time_kernel.cc b/tensorflow_addons/custom_ops/text/cc/kernels/parse_time_kernel.cc new file mode 100644 index 0000000000..d0dad300de --- /dev/null +++ b/tensorflow_addons/custom_ops/text/cc/kernels/parse_time_kernel.cc @@ -0,0 +1,103 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "absl/time/time.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +namespace addons { + +using ::tensorflow::OpKernel; +using ::tensorflow::OpKernelConstruction; +using ::tensorflow::OpKernelContext; +using ::tensorflow::Tensor; +using ::tensorflow::tstring; + +enum OutputUnit { + SECOND = 1, + MILLISECOND = 2, + MICROSECOND = 3, + NANOSECOND = 4, +}; + +bool OutputUnitFromString(string output_unit_str, OutputUnit* output_unit) { + if (output_unit_str == "SECOND") { + *output_unit = SECOND; + } else if (output_unit_str == "MILLISECOND") { + *output_unit = MILLISECOND; + } else if (output_unit_str == "MICROSECOND") { + *output_unit = MICROSECOND; + } else if (output_unit_str == "NANOSECOND") { + *output_unit = NANOSECOND; + } else { + return false; + } + return true; +} + +class ParseTimeOp : public OpKernel { + public: + explicit ParseTimeOp(OpKernelConstruction* context) : OpKernel(context) { + string output_unit_str; + OP_REQUIRES_OK(context, context->GetAttr("time_format", &time_format_)); + OP_REQUIRES_OK(context, context->GetAttr("output_unit", &output_unit_str)); + OP_REQUIRES(context, OutputUnitFromString(output_unit_str, &output_unit_), + errors::InvalidArgument("Invalid output unit")); + } + + void Compute(OpKernelContext* context) override { + const Tensor& input_tensor = context->input(0); + auto input = input_tensor.flat(); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + &output_tensor)); + + auto output_flat = output_tensor->flat(); + const int n = input.size(); + for (int i = 0; i < n; ++i) { + absl::Time time; + std::string err; + OP_REQUIRES(context, absl::ParseTime(time_format_, input(i), &time, &err), + errors::InvalidArgument("Parse time failed: ", err)); + switch (output_unit_) { + case SECOND: + output_flat(i) = absl::ToUnixSeconds(time); + break; + case MILLISECOND: + output_flat(i) = absl::ToUnixMillis(time); + break; + case MICROSECOND: + output_flat(i) = absl::ToUnixMicros(time); + break; + case NANOSECOND: + output_flat(i) = absl::ToUnixNanos(time); + break; + } + } + } + + private: + std::string time_format_; + OutputUnit output_unit_; +}; + +REGISTER_KERNEL_BUILDER(Name("Addons>ParseTime").Device(tensorflow::DEVICE_CPU), + ParseTimeOp); + +} // end namespace addons +} // end namespace tensorflow diff --git a/tensorflow_addons/custom_ops/text/cc/ops/parse_time_op.cc b/tensorflow_addons/custom_ops/text/cc/ops/parse_time_op.cc new file mode 100644 index 0000000000..7dca1ac596 --- /dev/null +++ b/tensorflow_addons/custom_ops/text/cc/ops/parse_time_op.cc @@ -0,0 +1,71 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +namespace tensorflow { +namespace addons { +REGISTER_OP("Addons>ParseTime") + .Input("time_string: string") + .Output("time_int64: int64") + .Attr("time_format: string") + .Attr("output_unit: {'SECOND', 'MILLISECOND', 'MICROSECOND', 'NANOSECOND'}") + .SetShapeFn(tensorflow::shape_inference::UnchangedShape) + .Doc(R"doc( +Parse an input string according to the provided format string into a Unix time, +the number of seconds / milliseconds / microseconds / nanoseconds elapsed since +January 1, 1970 UTC. + +Uses strftime()-like formatting options, with the same extensions as +FormatTime(), but with the exceptions that %E#S is interpreted as %E*S, and %E#f +as %E*f. %Ez and %E*z also accept the same inputs. + +%Y consumes as many numeric characters as it can, so the matching data should +always be terminated with a non-numeric. %E4Y always consumes exactly four +characters, including any sign. + +Unspecified fields are taken from the default date and time of ... + + "1970-01-01 00:00:00.0 +0000" + +For example, parsing a string of "15:45" (%H:%M) will return an Unix time that +represents "1970-01-01 15:45:00.0 +0000". + +Note that ParseTime only heeds the fields year, month, day, hour, minute, +(fractional) second, and UTC offset. Other fields, like weekday (%a or %A), +while parsed for syntactic validity, are ignored in the conversion. + +Date and time fields that are out-of-range will be treated as errors rather than +normalizing them like `absl::CivilSecond` does. For example, it is an error to +parse the date "Oct 32, 2013" because 32 is out of range. + +A leap second of ":60" is normalized to ":00" of the following minute with +fractional seconds discarded. The following table shows how the given seconds +and subseconds will be parsed: + + "59.x" -> 59.x // exact + "60.x" -> 00.0 // normalized + "00.x" -> 00.x // exact + +time_string: the input time string to be parsed. +time_format: the time format. +time_int64: the number of seconds / milliseconds / microseconds / nanoseconds + elapsed since January 1, 1970 UTC. +output_unit: the output unit of the parsed unix time. Can only be SECOND, + MILLISECOND, MICROSECOND, NANOSECOND. +)doc"); +} // end namespace addons +} // end namespace tensorflow diff --git a/tensorflow_addons/text/BUILD b/tensorflow_addons/text/BUILD index 21306ef3f9..fbeafe16f7 100644 --- a/tensorflow_addons/text/BUILD +++ b/tensorflow_addons/text/BUILD @@ -7,9 +7,11 @@ py_library( srcs = ([ "__init__.py", "crf.py", + "parse_time_op.py", "skip_gram_ops.py", ]), data = [ + "//tensorflow_addons/custom_ops/text:_parse_time_op.so", "//tensorflow_addons/custom_ops/text:_skip_gram_ops.so", "//tensorflow_addons/utils", ], @@ -41,3 +43,16 @@ py_test( ":text", ], ) + +py_test( + name = "parse_time_op_test", + size = "small", + srcs = [ + "parse_time_op_test.py", + ], + main = "parse_time_op_test.py", + srcs_version = "PY2AND3", + deps = [ + ":text", + ], +) diff --git a/tensorflow_addons/text/__init__.py b/tensorflow_addons/text/__init__.py index 11f8f9fecb..c61736f41d 100644 --- a/tensorflow_addons/text/__init__.py +++ b/tensorflow_addons/text/__init__.py @@ -32,4 +32,7 @@ # Skip Gram Sampling from tensorflow_addons.text.skip_gram_ops import skip_gram_sample -from tensorflow_addons.text.skip_gram_ops import skip_gram_sample_with_text_vocab \ No newline at end of file +from tensorflow_addons.text.skip_gram_ops import skip_gram_sample_with_text_vocab + +# Parse Time +from tensorflow_addons.text.parse_time_op import parse_time diff --git a/tensorflow_addons/text/parse_time_op.py b/tensorflow_addons/text/parse_time_op.py new file mode 100644 index 0000000000..3369c83200 --- /dev/null +++ b/tensorflow_addons/text/parse_time_op.py @@ -0,0 +1,86 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Parse time ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_addons.utils.resource_loader import get_path_to_datafile + +_parse_time_op = tf.load_op_library( + get_path_to_datafile("custom_ops/text/_parse_time_op.so")) + +tf.no_gradient("Addons>ParseTime") + + +def parse_time(time_string, time_format, output_unit): + """Parse an input string according to the provided format string into a + Unix time. + + Parse an input string according to the provided format string into a Unix + time, the number of seconds / milliseconds / microseconds / nanoseconds + elapsed since January 1, 1970 UTC. + + Uses strftime()-like formatting options, with the same extensions as + FormatTime(), but with the exceptions that %E#S is interpreted as %E*S, and + %E#f as %E*f. %Ez and %E*z also accept the same inputs. + + %Y consumes as many numeric characters as it can, so the matching + data should always be terminated with a non-numeric. %E4Y always + consumes exactly four characters, including any sign. + + Unspecified fields are taken from the default date and time of ... + + "1970-01-01 00:00:00.0 +0000" + + For example, parsing a string of "15:45" (%H:%M) will return an + Unix time that represents "1970-01-01 15:45:00.0 +0000". + + Note that ParseTime only heeds the fields year, month, day, hour, + minute, (fractional) second, and UTC offset. Other fields, like + weekday (%a or %A), while parsed for syntactic validity, are + ignored in the conversion. + + Date and time fields that are out-of-range will be treated as + errors rather than normalizing them like `absl::CivilSecond` does. + For example, it is an error to parse the date "Oct 32, 2013" + because 32 is out of range. + + A leap second of ":60" is normalized to ":00" of the following + minute with fractional seconds discarded. The following table + shows how the given seconds and subseconds will be parsed: + + "59.x" -> 59.x // exact + "60.x" -> 00.0 // normalized + "00.x" -> 00.x // exact + + Args: + time_string: The input time string to be parsed. + time_format: The time format. + output_unit: The output unit of the parsed unix time. Can only be SECOND, + MILLISECOND, MICROSECOND, NANOSECOND. + + Returns: + the number of seconds / milliseconds / microseconds / nanoseconds elapsed + since January 1, 1970 UTC. + + Raises: + ValueError: If `output_unit` is not a valid value, + if parsing `time_string` according to `time_format` failed. + """ + return _parse_time_op.addons_parse_time(time_string, time_format, + output_unit) diff --git a/tensorflow_addons/text/parse_time_op_test.py b/tensorflow_addons/text/parse_time_op_test.py new file mode 100644 index 0000000000..46fd6458d1 --- /dev/null +++ b/tensorflow_addons/text/parse_time_op_test.py @@ -0,0 +1,79 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Parse time op tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_addons import text +from tensorflow_addons.utils import test_utils + + +@test_utils.run_all_in_graph_and_eager_modes +class ParseTimeTest(tf.test.TestCase): + def test_parse_time(self): + time_format = "%Y-%m-%dT%H:%M:%E*S%Ez" + items = [ + ("2019-05-17T23:56:09.05Z", time_format, "NANOSECOND", + 1558137369050000000), + ("2019-05-17T23:56:09.05Z", time_format, "MICROSECOND", + 1558137369050000), + ("2019-05-17T23:56:09.05Z", time_format, "MILLISECOND", + 1558137369050), + ("2019-05-17T23:56:09.05Z", time_format, "SECOND", 1558137369), + ([ + "2019-05-17T23:56:09.05Z", "2019-05-20T11:22:33.44Z", + "2019-05-30T22:33:44.55Z" + ], time_format, "MILLISECOND", + [1558137369050, 1558351353440, 1559255624550]), + ] + for time_string, time_format, output_unit, expected in items: + result = self.evaluate( + text.parse_time( + time_string=time_string, + time_format=time_format, + output_unit=output_unit)) + self.assertAllEqual(expected, result) + + def test_invalid_output_unit(self): + errors = (ValueError, tf.errors.InvalidArgumentError) + with self.assertRaises(errors): + text.parse_time( + time_string="2019-05-17T23:56:09.05Z", + time_format="%Y-%m-%dT%H:%M:%E*S%Ez", + output_unit="INVALID") + + def test_invalid_time_format(self): + with self.assertRaises(tf.errors.InvalidArgumentError): + self.evaluate( + text.parse_time( + time_string="2019-05-17T23:56:09.05Z", + time_format="INVALID", + output_unit="SECOND")) + + def test_invalid_time_string(self): + with self.assertRaises(tf.errors.InvalidArgumentError): + self.evaluate( + text.parse_time( + time_string="INVALID", + time_format="%Y-%m-%dT%H:%M:%E*S%Ez", + output_unit="SECOND")) + + +if __name__ == "__main__": + tf.test.main()