From d20e7741bb3fa8973bcfc7ea034206e3d10bc7b9 Mon Sep 17 00:00:00 2001 From: Rebekah Davis Date: Tue, 21 Oct 2025 16:03:41 -0400 Subject: [PATCH 1/5] WIP: Integration tests for copying across filesystems. --- tiledb/tests/test_vfs.py | 65 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 46e826d824..6e8922442f 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -4,13 +4,14 @@ import pickle import random import sys +import uuid import numpy as np import pytest import tiledb -from .common import DiskTestCase, rand_utf8 +from .common import DiskTestCase, create_vfs_dir, rand_utf8 class TestVFS(DiskTestCase): @@ -115,6 +116,68 @@ def test_copy(self): with self.assertRaises(tiledb.TileDBError): vfs.copy_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) + # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) + # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) + @pytest.mark.parametrize("src", ["file"]) + @pytest.mark.parametrize("dst", ["file"]) + def test_copy_across(self, src, dst): + # if src == dst: + # return + + vfs = tiledb.VFS() + + # print(src) + # print(dst) + + if not vfs.supports(src) or not vfs.supports(dst): + return + + # Setup + if src == "file": + src_sep: str = "://" if os.name == "nt" else ":///" + else: + src_sep: str = "://" + + src_dir = src + src_sep + "tiledb-" + str(random.randint(0, 10e10)) + + create_vfs_dir(src_dir) + # create_vfs_dir(src + "://") + # #create_vfs_dir(dst + "://tiledb-" + str(random.randint(0, 10e10))+ "/dir") + self.assertEqual(1, 1) + return + # create_vfs_dir(dst + "://") + # if src != "file": + # vfs.create_bucket(src + "://dir") + # if dst != "file": + # vfs.create_bucket(dst + "://dir") + + ## NOte: need "file:///"" for POSIX, need to use some test harness to determine filename + testfile = ":///testfile" + src_file = src + testfile + dst_file = dst + testfile + + contents = b"TileDB test copying across filesystems." + filelen = len(contents) + + vfs.touch(src_file) + self.assertTrue(vfs.is_file(src_file)) + vfs.write(src_file, contents) + self.assertEqual(vfs.read(src_file, 0, filelen), contents) + + vfs.copy_file(src_file, dst_file) + self.assertTrue(vfs.is_file(dst_file)) + self.assertEqual(vfs.read(dst_file, 0, filelen), contents) + + # Clean up + if src != "file": + vfs.remove_bucket(src) + else: + vfs.remove_dir(src) + if dst != "file": + vfs.remove_bucket(dst) + else: + vfs.remove_dir(src) + def test_write_read(self): vfs = tiledb.VFS() From 0e174d4585ba8104b7f516a214c45ea6a22d2130 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 21 Oct 2025 17:02:45 -0400 Subject: [PATCH 2/5] Add helper function to create vfs-compatible paths Fix VFS creation, writing, and reading steps --- tiledb/tests/common.py | 11 +++++++ tiledb/tests/test_vfs.py | 65 ++++++++++++++++------------------------ 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py index 087089b363..36ea1ab1fd 100644 --- a/tiledb/tests/common.py +++ b/tiledb/tests/common.py @@ -70,6 +70,17 @@ def create_vfs_dir(path): vfs.create_dir(path) +def vfs_path(scheme: str, prefix: str=None) -> str: + """Create a VFS-compatible path""" + if not prefix: + prefix: str = str(uuid.uuid4()) + if scheme == "file": + return f"{scheme}://{tempfile.mktemp(prefix=prefix)}" + if "/" not in prefix: + prefix: str = f"{uuid.uuid4()}/{prefix}" + return f"{scheme}://{prefix}" + + class DiskTestCase: """Helper class to store paths and associated allocation frames. This is both a cleanup step and a test of resource management. Some platforms will diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 6e8922442f..84effcd060 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -4,14 +4,14 @@ import pickle import random import sys -import uuid +import tempfile import numpy as np import pytest import tiledb -from .common import DiskTestCase, create_vfs_dir, rand_utf8 +from .common import DiskTestCase, vfs_path, rand_utf8 class TestVFS(DiskTestCase): @@ -118,55 +118,40 @@ def test_copy(self): # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) + @pytest.mark.skipif( + sys.platform == "win32", + reason="VFS copy commands from core are not supported on Windows", + ) @pytest.mark.parametrize("src", ["file"]) @pytest.mark.parametrize("dst", ["file"]) - def test_copy_across(self, src, dst): + def test_copy_across(self, src: str, dst: str): # if src == dst: # return vfs = tiledb.VFS() - # print(src) - # print(dst) - if not vfs.supports(src) or not vfs.supports(dst): return - # Setup - if src == "file": - src_sep: str = "://" if os.name == "nt" else ":///" - else: - src_sep: str = "://" - - src_dir = src + src_sep + "tiledb-" + str(random.randint(0, 10e10)) - - create_vfs_dir(src_dir) - # create_vfs_dir(src + "://") - # #create_vfs_dir(dst + "://tiledb-" + str(random.randint(0, 10e10))+ "/dir") - self.assertEqual(1, 1) + srcdir: str = vfs_path(src, prefix="tiledb-copy-src") + vfs.create_dir(srcdir) + srcfile: str = f"{srcdir}/testfile" + vfs.touch(srcfile) + self.assertTrue(vfs.isfile(srcfile)) + contents: bytes = b"TileDB test copying across filesystems." + with vfs.open(srcfile, "wb") as handle: + handle.write(contents) + + with vfs.open(srcfile) as handle: + self.assertEqual(handle.read(), contents) + + dstdir: str = vfs_path(dst, prefix="tiledb-copy-dst") + vfs.create_dir(dstdir) + dstfile: str = f"{dstdir}/testfile" + vfs.copy_file(srcfile, dstfile) + with vfs.open(dstfile) as handle: + self.assertEqual(handle.read(), contents) return - # create_vfs_dir(dst + "://") - # if src != "file": - # vfs.create_bucket(src + "://dir") - # if dst != "file": - # vfs.create_bucket(dst + "://dir") - - ## NOte: need "file:///"" for POSIX, need to use some test harness to determine filename - testfile = ":///testfile" - src_file = src + testfile - dst_file = dst + testfile - - contents = b"TileDB test copying across filesystems." - filelen = len(contents) - - vfs.touch(src_file) - self.assertTrue(vfs.is_file(src_file)) - vfs.write(src_file, contents) - self.assertEqual(vfs.read(src_file, 0, filelen), contents) - - vfs.copy_file(src_file, dst_file) - self.assertTrue(vfs.is_file(dst_file)) - self.assertEqual(vfs.read(dst_file, 0, filelen), contents) # Clean up if src != "file": From 13ddc23057af9d36ced81abb157774944d07d70c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 21 Oct 2025 17:07:12 -0400 Subject: [PATCH 3/5] Run `pre-commit` --- tiledb/tests/common.py | 2 +- tiledb/tests/test_vfs.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py index 36ea1ab1fd..6ed850187d 100644 --- a/tiledb/tests/common.py +++ b/tiledb/tests/common.py @@ -70,7 +70,7 @@ def create_vfs_dir(path): vfs.create_dir(path) -def vfs_path(scheme: str, prefix: str=None) -> str: +def vfs_path(scheme: str, prefix: str = None) -> str: """Create a VFS-compatible path""" if not prefix: prefix: str = str(uuid.uuid4()) diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 84effcd060..4b0ff8d176 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -11,7 +11,7 @@ import tiledb -from .common import DiskTestCase, vfs_path, rand_utf8 +from .common import DiskTestCase, rand_utf8, vfs_path class TestVFS(DiskTestCase): @@ -119,8 +119,8 @@ def test_copy(self): # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) @pytest.mark.skipif( - sys.platform == "win32", - reason="VFS copy commands from core are not supported on Windows", + sys.platform == "win32", + reason="VFS copy commands from core are not supported on Windows", ) @pytest.mark.parametrize("src", ["file"]) @pytest.mark.parametrize("dst", ["file"]) From d60c4e988f762b98ec492f168105a989a56bc295 Mon Sep 17 00:00:00 2001 From: Rebekah Davis Date: Wed, 22 Oct 2025 16:27:46 -0400 Subject: [PATCH 4/5] working file->file and file->s3 tests. --- tiledb/tests/common.py | 5 ++++- tiledb/tests/test_vfs.py | 44 ++++++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py index 6ed850187d..8d1daa12e5 100644 --- a/tiledb/tests/common.py +++ b/tiledb/tests/common.py @@ -73,7 +73,10 @@ def create_vfs_dir(path): def vfs_path(scheme: str, prefix: str = None) -> str: """Create a VFS-compatible path""" if not prefix: - prefix: str = str(uuid.uuid4()) + if scheme == "s3": + prefix: str = "tiledb-" + str(random.randint(0, 10e10)) + else: + prefix: str = "tiledb-" + str(uuid.uuid4()) if scheme == "file": return f"{scheme}://{tempfile.mktemp(prefix=prefix)}" if "/" not in prefix: diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 4b0ff8d176..4299b6f36a 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -13,6 +13,8 @@ from .common import DiskTestCase, rand_utf8, vfs_path +s3_bucket = os.getenv("S3_BUCKET") + class TestVFS(DiskTestCase): def test_supports(self): @@ -116,52 +118,68 @@ def test_copy(self): with self.assertRaises(tiledb.TileDBError): vfs.copy_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) - # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) - # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) @pytest.mark.skipif( sys.platform == "win32", - reason="VFS copy commands from core are not supported on Windows", + reason="Windows paths are difficult; Posix is sufficient for testing.", ) @pytest.mark.parametrize("src", ["file"]) @pytest.mark.parametrize("dst", ["file"]) + # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) + # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) def test_copy_across(self, src: str, dst: str): + # Currently, skip if both FSes are the same. This is covered above. + # However, this test might ought to replace the above. # if src == dst: # return vfs = tiledb.VFS() + # Return if neither filesystem is supported. if not vfs.supports(src) or not vfs.supports(dst): return + # Create source file, and write to it. srcdir: str = vfs_path(src, prefix="tiledb-copy-src") - vfs.create_dir(srcdir) + if src == "file": + vfs.create_dir(srcdir) + else: + vfs.create_bucket(srcdir) srcfile: str = f"{srcdir}/testfile" vfs.touch(srcfile) self.assertTrue(vfs.isfile(srcfile)) contents: bytes = b"TileDB test copying across filesystems." with vfs.open(srcfile, "wb") as handle: handle.write(contents) - with vfs.open(srcfile) as handle: self.assertEqual(handle.read(), contents) + # Copy src -> dst and assert the file contents are unchanged. dstdir: str = vfs_path(dst, prefix="tiledb-copy-dst") - vfs.create_dir(dstdir) + if dst == "file": + vfs.create_dir(dstdir) + else: + vfs.create_bucket(dstdir) dstfile: str = f"{dstdir}/testfile" vfs.copy_file(srcfile, dstfile) with vfs.open(dstfile) as handle: self.assertEqual(handle.read(), contents) - return + + # Copy back dst -> src and assert the file contents are unchanged. + vfs.remove_file(srcfile) + self.assertFalse(vfs.isfile(srcfile)) + vfs.copy_file(dstfile, srcfile) + with vfs.open(srcfile) as handle: + self.assertEqual(handle.read(), contents) # Clean up - if src != "file": - vfs.remove_bucket(src) + if src == "file": + vfs.remove_dir(srcdir) else: - vfs.remove_dir(src) - if dst != "file": - vfs.remove_bucket(dst) + vfs.remove_bucket(srcdir) + if dst == "file": + vfs.remove_dir(dstdir) else: - vfs.remove_dir(src) + vfs.remove_bucket(dstdir) def test_write_read(self): vfs = tiledb.VFS() From 01b10ba7d3c3bdbc6bbb9df83e664935951d2a3d Mon Sep 17 00:00:00 2001 From: Rebekah Davis Date: Mon, 27 Oct 2025 16:40:44 -0400 Subject: [PATCH 5/5] Add test for copying across filesystems. --- tiledb/tests/common.py | 19 ++++++++--------- tiledb/tests/conftest.py | 33 +++++++++++++++++++++++++++++ tiledb/tests/test_vfs.py | 45 ++++++++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 28 deletions(-) diff --git a/tiledb/tests/common.py b/tiledb/tests/common.py index 8d1daa12e5..3e97afca51 100644 --- a/tiledb/tests/common.py +++ b/tiledb/tests/common.py @@ -70,18 +70,17 @@ def create_vfs_dir(path): vfs.create_dir(path) -def vfs_path(scheme: str, prefix: str = None) -> str: - """Create a VFS-compatible path""" - if not prefix: - if scheme == "s3": - prefix: str = "tiledb-" + str(random.randint(0, 10e10)) - else: - prefix: str = "tiledb-" + str(uuid.uuid4()) +def vfs_path(scheme: str) -> str: + prefix = "tiledb-" + if scheme == "s3": + prefix += str(random.randint(0, 10000000000)) + else: + prefix += str(uuid.uuid4()) + if scheme == "file": return f"{scheme}://{tempfile.mktemp(prefix=prefix)}" - if "/" not in prefix: - prefix: str = f"{uuid.uuid4()}/{prefix}" - return f"{scheme}://{prefix}" + else: + return f"{scheme}://{prefix}" class DiskTestCase: diff --git a/tiledb/tests/conftest.py b/tiledb/tests/conftest.py index ecce429e50..a98247f0ed 100644 --- a/tiledb/tests/conftest.py +++ b/tiledb/tests/conftest.py @@ -72,3 +72,36 @@ def original_os_fork(): """Provides the original unpatched os.fork.""" if sys.platform != "win32": return os.fork + + +@pytest.fixture +def vfs_config() -> dict[str, str]: + config: dict[str, str] = {} + # Configure S3 + if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"): + config["vfs.s3.aws_access_key_id"] = os.getenv("AWS_ACCESS_KEY_ID") + config["vfs.s3.aws_secret_access_key"] = os.getenv("AWS_SECRET_ACCESS_KEY") + if os.getenv("VFS_S3_USE_MINIO"): + config["vfs.s3.endpoint_override"] = "localhost:9999" + config["vfs.s3.scheme"] = "https" + config["vfs.s3.use_virtual_addressing"] = "false" + config["vfs.s3.verify_ssl"] = "false" + + # Configure Azure + if os.getenv("AZURE_BLOB_ENDPOINT"): + config["vfs.azure.blob_endpoint"] = os.getenv("AZURE_BLOB_ENDPOINT") + if os.getenv("AZURE_STORAGE_ACCOUNT_TOKEN"): + config["vfs.azure.storage_sas_token"] = os.getenv("AZURE_STORAGE_ACCOUNT_TOKEN") + elif os.getenv("AZURE_STORAGE_ACCOUNT_NAME") and os.getenv( + "AZURE_STORAGE_ACCOUNT_KEY" + ): + config["vfs.azure.storage_account_name"] = os.getenv( + "AZURE_STORAGE_ACCOUNT_NAME" + ) + config["vfs.azure.storage_account_key"] = os.getenv("AZURE_STORAGE_ACCOUNT_KEY") + + # Configure Google Cloud + if os.getenv("TILEDB_TEST_GCS_ENDPOINT"): + config["vfs.gcs.endpoint"] = os.getenv("TILEDB_TEST_GCS_ENDPOINT") + + return config diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 4299b6f36a..262d099f04 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -118,32 +118,46 @@ def test_copy(self): with self.assertRaises(tiledb.TileDBError): vfs.copy_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) + # Note: Azure tests are intermittently failing. + # Seemingly mostly azure->azure, but also test teardown. + # I think it's an Azurite bucket storage thing, actually. @pytest.mark.skipif( sys.platform == "win32", reason="Windows paths are difficult; Posix is sufficient for testing.", ) - @pytest.mark.parametrize("src", ["file"]) - @pytest.mark.parametrize("dst", ["file"]) - # @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) - # @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) - def test_copy_across(self, src: str, dst: str): + @pytest.mark.parametrize("src", ["file", "s3", "azure", "gcs"]) + @pytest.mark.parametrize("dst", ["file", "s3", "azure", "gcs"]) + def test_copy_across(self, src: str, dst: str, vfs_config): # Currently, skip if both FSes are the same. This is covered above. # However, this test might ought to replace the above. - # if src == dst: - # return + if src == dst: + return - vfs = tiledb.VFS() + # Set configuration options + if (src == "s3" or dst == "s3") and not vfs_config.get( + "vfs.s3.aws_access_key_id" + ): + return + if (src == "azure" or dst == "azure") and not any( + x.startswith("vfs.azure") for x in vfs_config.keys() + ): + return + if (src == "gcs" or dst == "gcs") and not vfs_config.get("vfs.gcs.endpoint"): + return + vfs = tiledb.VFS(vfs_config) # Return if neither filesystem is supported. if not vfs.supports(src) or not vfs.supports(dst): return # Create source file, and write to it. - srcdir: str = vfs_path(src, prefix="tiledb-copy-src") + srcdir: str = vfs_path(src) if src == "file": vfs.create_dir(srcdir) + self.assertTrue(vfs.is_dir(srcdir)) else: vfs.create_bucket(srcdir) + self.assertTrue(vfs.is_bucket(srcdir)) srcfile: str = f"{srcdir}/testfile" vfs.touch(srcfile) self.assertTrue(vfs.isfile(srcfile)) @@ -154,23 +168,18 @@ def test_copy_across(self, src: str, dst: str): self.assertEqual(handle.read(), contents) # Copy src -> dst and assert the file contents are unchanged. - dstdir: str = vfs_path(dst, prefix="tiledb-copy-dst") + dstdir: str = vfs_path(dst) if dst == "file": vfs.create_dir(dstdir) + self.assertTrue(vfs.is_dir(dstdir)) else: vfs.create_bucket(dstdir) + self.assertTrue(vfs.is_bucket(dstdir)) dstfile: str = f"{dstdir}/testfile" - vfs.copy_file(srcfile, dstfile) + vfs.copy_dir(srcdir, dstdir) with vfs.open(dstfile) as handle: self.assertEqual(handle.read(), contents) - # Copy back dst -> src and assert the file contents are unchanged. - vfs.remove_file(srcfile) - self.assertFalse(vfs.isfile(srcfile)) - vfs.copy_file(dstfile, srcfile) - with vfs.open(srcfile) as handle: - self.assertEqual(handle.read(), contents) - # Clean up if src == "file": vfs.remove_dir(srcdir)