Skip to content

Commit 2b4265a

Browse files
zou000terrytangyuan
authored andcommitted
Add OSS support (#199)
1 parent 8d151f3 commit 2b4265a

File tree

19 files changed

+4602
-0
lines changed

19 files changed

+4602
-0
lines changed

WORKSPACE

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,65 @@ http_archive(
300300
"http://github.com/libexpat/libexpat/archive/R_2_2_6.tar.gz",
301301
],
302302
)
303+
304+
http_archive(
305+
name = "libapr1",
306+
build_file = "//third_party:libapr1.BUILD",
307+
patch_args = ["-p1"],
308+
patches = [
309+
"//third_party:libapr1.patch",
310+
],
311+
sha256 = "1a0909a1146a214a6ab9de28902045461901baab4e0ee43797539ec05b6dbae0",
312+
strip_prefix = "apr-1.6.5",
313+
urls = [
314+
"https://github.com/apache/apr/archive/1.6.5.tar.gz",
315+
],
316+
)
317+
318+
http_archive(
319+
name = "libaprutil1",
320+
build_file = "//third_party:libaprutil1.BUILD",
321+
patch_args = ["-p1"],
322+
patches = [
323+
"//third_party:libaprutil1.patch",
324+
],
325+
sha256 = "4c9ae319cedc16890fc2776920e7d529672dda9c3a9a9abd53bd80c2071b39af",
326+
strip_prefix = "apr-util-1.6.1",
327+
urls = [
328+
"https://github.com/apache/apr-util/archive/1.6.1.tar.gz",
329+
],
330+
)
331+
332+
http_archive(
333+
name = "mxml",
334+
build_file = "//third_party:mxml.BUILD",
335+
patch_args = ["-p1"],
336+
patches = [
337+
"//third_party:mxml.patch",
338+
],
339+
sha256 = "4d850d15cdd4fdb9e82817eb069050d7575059a9a2729c82b23440e4445da199",
340+
strip_prefix = "mxml-2.12",
341+
urls = [
342+
"https://github.com/michaelrsweet/mxml/archive/v2.12.tar.gz",
343+
],
344+
)
345+
346+
http_archive(
347+
name = "minini",
348+
build_file = "//third_party:minini.BUILD",
349+
sha256 = "a97dd5ac6811af95c8f2aeaa6894b3113377e78ffd585363c6848745760d0152",
350+
strip_prefix = "minIni-1.0",
351+
urls = [
352+
"https://github.com/ElasticDL/minIni/archive/v1.0.tar.gz",
353+
],
354+
)
355+
356+
http_archive(
357+
name = "aliyun_oss_c_sdk",
358+
build_file = "//third_party:oss_c_sdk.BUILD",
359+
sha256 = "6450d3970578c794b23e9e1645440c6f42f63be3f82383097660db5cf2fba685",
360+
strip_prefix = "aliyun-oss-c-sdk-3.7.0",
361+
urls = [
362+
"https://github.com/aliyun/aliyun-oss-c-sdk/archive/3.7.0.tar.gz",
363+
],
364+
)

tensorflow_io/oss/BUILD

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package(
2+
default_visibility = ["//visibility:public"],
3+
)
4+
5+
licenses(["notice"]) # Apache 2.0
6+
7+
cc_binary(
8+
name = "python/ops/_oss_ops.so",
9+
srcs = [
10+
"kernels/ossfs/oss_file_system.cc",
11+
"kernels/ossfs/oss_file_system.h",
12+
"ops/ossfs_ops.cc",
13+
],
14+
copts = [
15+
"-D_GLIBCXX_USE_CXX11_ABI=0",
16+
],
17+
linkshared = 1,
18+
deps = [
19+
"@aliyun_oss_c_sdk",
20+
"@local_config_tf//:libtensorflow_framework",
21+
"@local_config_tf//:tf_header_lib",
22+
"@minini",
23+
],
24+
)

tensorflow_io/oss/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# TensorFlow OSS Filesystem Extension
2+
3+
OSS is an object storage service provided by Alibaba Cloud, see [here](https://www.alibabacloud.com/product/oss) for more information about the service.
4+
5+
This module provides an extension that emulates a filesystem using the object storage service. The directory structures are encoded in object keys and file contents are stored in objects. The extension is implemented using [OSS C SDK](https://github.com/aliyun/aliyun-oss-c-sdk).
6+
7+
To use the extension, first save your OSS credential in a file, in `INI` format:
8+
9+
```
10+
[OSSCredentials]
11+
host = cn-hangzhou.oss.aliyun-inc.com
12+
accessid = your_oss_access_id
13+
accesskey = you_oss_access_key
14+
```
15+
16+
Then set environment variable `OSS_CREDENTIALS` to the path of the file.
17+
18+
In Python code, import the extension `ossfs_op` module to use the extension with `gfile`. The files and directory URI should have `oss://` prefix, followed by a bucket name, then the directory hierarchy.
19+
20+
```python
21+
import tensorflow_io.oss.python.ops.ossfs_ops
22+
from tensorflow.python.platform import gfile
23+
24+
gfile.MkDir('oss://your_bucket_name/test_dir')
25+
```
26+

27+
With the extension installed, OSS files can be use with Dataset Ops, etc., in the same fashion as other files.
28+
29+
```python
30+
dataset = tf.data.TextLineDataset(["oss://bucket_name/data_dir/file1"])
31+
```
32+
33+
## Test
34+
35+
File `tests/test_oss.py` contains basic filesystem functionality tests. See `README.md` in root directory for more information about running tests. Besides `OSS_CREDENTIALS`, the tests also require an `OSS_FS_TEST_BUCKET` environment variable containing an accessible bucket name. Make sure they are set before running `pytest tests`. You can also just run the OSS test using `pytest tests/test_oss.py`

tensorflow_io/oss/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""Alibaba OSS File System.
16+
17+
@@ossfs_ops
18+
"""
19+
20+
from __future__ import absolute_import
21+
from __future__ import division
22+
from __future__ import print_function
23+
24+
from tensorflow_io.oss.python.ops import ossfs_ops # pylint: disable=unused-import
25+
26+
from tensorflow.python.util.all_util import remove_undocumented
27+
28+
_allowed_symbols = [
29+
"ossfs_ops",
30+
]
31+
32+
remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)

0 commit comments

Comments
 (0)