From 473259308b41fe672e1a7b6173895b18a68d91a5 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Tue, 31 May 2022 18:52:36 -0400 Subject: [PATCH 1/4] add header for bert tokenizer --- torchtext/csrc/bert_tokenizer.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/torchtext/csrc/bert_tokenizer.cpp b/torchtext/csrc/bert_tokenizer.cpp index 06aba8ae7c..2ee86c2a97 100644 --- a/torchtext/csrc/bert_tokenizer.cpp +++ b/torchtext/csrc/bert_tokenizer.cpp @@ -1,3 +1,25 @@ + + +/* Copyright (c) Meta Platforms, Inc. and affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +Original code is taken from https://github.com/LieluoboAi/radish/blob/master/radish/bert/bert_tokenizer.cc + +The code is modified and summary is provided in this PR https://github.com/pytorch/text/pull/1707 +*/ + #include #include From a6d54ff0fc01e0381ecf91ae7cc3d668b790508d Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Tue, 31 May 2022 18:54:09 -0400 Subject: [PATCH 2/4] minor change --- torchtext/csrc/bert_tokenizer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchtext/csrc/bert_tokenizer.cpp b/torchtext/csrc/bert_tokenizer.cpp index 2ee86c2a97..f64ec370de 100644 --- a/torchtext/csrc/bert_tokenizer.cpp +++ b/torchtext/csrc/bert_tokenizer.cpp @@ -1,5 +1,3 @@ - - /* Copyright (c) Meta Platforms, Inc. and affiliates. Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,6 +18,7 @@ Original code is taken from https://github.com/LieluoboAi/radish/blob/master/rad The code is modified and summary is provided in this PR https://github.com/pytorch/text/pull/1707 */ + #include #include From cd0c41784e56b780dc65156f5c7f291ce9c16a4a Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Tue, 31 May 2022 19:35:41 -0400 Subject: [PATCH 3/4] minor modification --- torchtext/csrc/bert_tokenizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchtext/csrc/bert_tokenizer.cpp b/torchtext/csrc/bert_tokenizer.cpp index f64ec370de..7c5e274c7c 100644 --- a/torchtext/csrc/bert_tokenizer.cpp +++ b/torchtext/csrc/bert_tokenizer.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) Meta Platforms, Inc. and affiliates. +/* Portions Copyright (c) Meta Platforms, Inc. and affiliates. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 2147d09f269cdb741903488c21603800e23e1aad Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Tue, 31 May 2022 19:43:11 -0400 Subject: [PATCH 4/4] fix lint --- torchtext/csrc/bert_tokenizer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/torchtext/csrc/bert_tokenizer.cpp b/torchtext/csrc/bert_tokenizer.cpp index 7c5e274c7c..4bade1153a 100644 --- a/torchtext/csrc/bert_tokenizer.cpp +++ b/torchtext/csrc/bert_tokenizer.cpp @@ -13,12 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. -Original code is taken from https://github.com/LieluoboAi/radish/blob/master/radish/bert/bert_tokenizer.cc +Original code is taken from +https://github.com/LieluoboAi/radish/blob/master/radish/bert/bert_tokenizer.cc -The code is modified and summary is provided in this PR https://github.com/pytorch/text/pull/1707 +The code is modified and summary is provided in this PR +https://github.com/pytorch/text/pull/1707 */ - #include #include