diff --git a/torchtext/csrc/register_pybindings.cpp b/torchtext/csrc/register_pybindings.cpp index 5f0a6d0483..afa4708cdd 100644 --- a/torchtext/csrc/register_pybindings.cpp +++ b/torchtext/csrc/register_pybindings.cpp @@ -179,7 +179,16 @@ PYBIND11_MODULE(_torchtext, m) { .def_property_readonly("byte_encoder_", &GPT2BPEEncoder::GetByteEncoder) .def("encode", &GPT2BPEEncoder::Encode) .def("tokenize", &GPT2BPEEncoder::Tokenize) - .def("decode", &GPT2BPEEncoder::Decode) + .def( + "decode", + [](const c10::intrusive_ptr& self, + const std::vector& tokens) { + std::string s = self->Decode(tokens); + PyObject* py_obj = + PyUnicode_DecodeUTF8(s.data(), s.length(), "ignore"); + py::str py_s = py::reinterpret_steal(py_obj); + return py_s; + }) .def( "add_special_tokens", [](const c10::intrusive_ptr& self,