From 5552aefb91b52c789d7a5a03690f69d2e3f6c8a6 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Mon, 21 Feb 2022 18:54:51 +0000 Subject: [PATCH 1/9] add eurosat --- .pre-commit-config.yaml | 8 +++ .../prototype/datasets/_builtin/__init__.py | 3 +- .../prototype/datasets/_builtin/eurosat.py | 51 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 torchvision/prototype/datasets/_builtin/eurosat.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dccc1c1a9b2..c38139349c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,17 @@ repos: rev: v4.0.1 hooks: - id: check-docstring-first + language_version: python3 - id: check-toml + language_version: python3 - id: check-yaml exclude: packaging/.* + language_version: python3 - id: mixed-line-ending args: [--fix=lf] + language_version: python3 - id: end-of-file-fixer + language_version: python3 - repo: https://github.com/omnilib/ufmt rev: v1.3.2 @@ -17,14 +22,17 @@ repos: additional_dependencies: - black == 21.9b0 - usort == 0.6.4 + language_version: python3 - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 args: [--config=setup.cfg] + language_version: python3 - repo: https://github.com/PyCQA/pydocstyle rev: 6.1.1 hooks: - id: pydocstyle + language_version: python3 diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py index 9fdfca904f5..734f4e1e8e2 100644 --- a/torchvision/prototype/datasets/_builtin/__init__.py +++ b/torchvision/prototype/datasets/_builtin/__init__.py @@ -5,10 +5,11 @@ from .coco import Coco from .cub200 import CUB200 from .dtd import DTD +from .eurosat import EuroSAT from .fer2013 import FER2013 from .gtsrb import GTSRB from .imagenet import ImageNet -from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST +from .mnist import EMNIST, FashionMNIST, KMNIST, MNIST, QMNIST from .oxford_iiit_pet import OxfordIITPet from .pcam import PCAM from .sbd import SBD diff --git a/torchvision/prototype/datasets/_builtin/eurosat.py b/torchvision/prototype/datasets/_builtin/eurosat.py new file mode 100644 index 00000000000..cd423f6dc22 --- /dev/null +++ b/torchvision/prototype/datasets/_builtin/eurosat.py @@ -0,0 +1,51 @@ +from typing import Any, Dict, List, Tuple + +from torchdata.datapipes.iter import IterDataPipe, Mapper +from torchvision.prototype.datasets.utils import Dataset, DatasetConfig, DatasetInfo, HttpResource, OnlineResource +from torchvision.prototype.datasets.utils._internal import hint_sharding, hint_shuffling +from torchvision.prototype.features import EncodedImage, Label + + +class EuroSAT(Dataset): + def _make_info(self) -> DatasetInfo: + return DatasetInfo( + "EuroSAT", + homepage="https://github.com/phelber/eurosat", + categories=( + "AnnualCrop", + "Forest", + "HerbaceousVegetation", + "Highway", + "Industrial," "Pasture", + "PermanentCrop", + "Residential", + "River", + "SeaLake", + ), + ) + + def resources(self, config: DatasetConfig) -> List[OnlineResource]: + url_root = "https://madm.dfki.de/files/sentinel" + data = HttpResource( + f"{url_root}/EuroSAT.zip", + sha256="8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd", + ) + return [data] + + def _prepare_sample(self, data: Tuple[str, Any]) -> Dict[str, Any]: + image_path = data[0] + category = image_path.split("/")[-2] + buffer = data[1] + return dict( + label=Label.from_category(category, categories=self.categories), + path=image_path, + image=EncodedImage.from_file(buffer), + ) + + def _make_datapipe( + self, resource_dps: List[IterDataPipe], *, config: DatasetConfig + ) -> IterDataPipe[Dict[str, Any]]: + images_dp = resource_dps[0] + images_dp = hint_sharding(images_dp) + images_dp = hint_shuffling(images_dp) + return Mapper(images_dp, self._prepare_sample) From a6b9ca6c2592ca87c9b4865a19998d4103540f59 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Tue, 22 Feb 2022 17:26:32 +0000 Subject: [PATCH 2/9] revert formatting --- torchvision/prototype/datasets/_builtin/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py index 734f4e1e8e2..9fdfca904f5 100644 --- a/torchvision/prototype/datasets/_builtin/__init__.py +++ b/torchvision/prototype/datasets/_builtin/__init__.py @@ -5,11 +5,10 @@ from .coco import Coco from .cub200 import CUB200 from .dtd import DTD -from .eurosat import EuroSAT from .fer2013 import FER2013 from .gtsrb import GTSRB from .imagenet import ImageNet -from .mnist import EMNIST, FashionMNIST, KMNIST, MNIST, QMNIST +from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST from .oxford_iiit_pet import OxfordIITPet from .pcam import PCAM from .sbd import SBD From a0995e1e8632da4606f20022dcb54685dc267c51 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Tue, 22 Feb 2022 17:27:05 +0000 Subject: [PATCH 3/9] port test and make style changes --- test/builtin_dataset_mocks.py | 23 +++++++++++++++++-- .../prototype/datasets/_builtin/eurosat.py | 21 ++++++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index 123d8f29d3f..5a6f04e1f72 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -10,13 +10,13 @@ import pickle import random import xml.etree.ElementTree as ET -from collections import defaultdict, Counter +from collections import Counter, defaultdict import numpy as np import PIL.Image import pytest import torch -from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file +from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip from torch.nn.functional import one_hot from torch.testing import make_tensor as _make_tensor from torchvision.prototype.datasets._api import find @@ -1299,6 +1299,25 @@ def cub200(info, root, config): return num_samples_map[config.split] +@register_mock +def eurosat(info, root, config): + print("info", info) + data_folder = pathlib.Path(root, "eurosat", "2750") + data_folder.mkdir(data_folder) + + num_examples_per_class = 3 + classes = ("AnnualCrop", "Forest") + for cls in classes: + create_image_folder( + root=data_folder, + name=cls, + file_name_fn=lambda idx: f"{cls}_{idx}.jpg", + num_examples=num_examples_per_class, + ) + + return len(classes) * num_examples_per_class + + @register_mock def svhn(info, root, config): import scipy.io as sio diff --git a/torchvision/prototype/datasets/_builtin/eurosat.py b/torchvision/prototype/datasets/_builtin/eurosat.py index cd423f6dc22..e03e0b19582 100644 --- a/torchvision/prototype/datasets/_builtin/eurosat.py +++ b/torchvision/prototype/datasets/_builtin/eurosat.py @@ -1,3 +1,4 @@ +import pathlib from typing import Any, Dict, List, Tuple from torchdata.datapipes.iter import IterDataPipe, Mapper @@ -9,7 +10,7 @@ class EuroSAT(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( - "EuroSAT", + "eurosat", homepage="https://github.com/phelber/eurosat", categories=( "AnnualCrop", @@ -25,27 +26,25 @@ def _make_info(self) -> DatasetInfo: ) def resources(self, config: DatasetConfig) -> List[OnlineResource]: - url_root = "https://madm.dfki.de/files/sentinel" data = HttpResource( - f"{url_root}/EuroSAT.zip", + "https://madm.dfki.de/files/sentinel/EuroSAT.zip", sha256="8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd", ) return [data] def _prepare_sample(self, data: Tuple[str, Any]) -> Dict[str, Any]: - image_path = data[0] - category = image_path.split("/")[-2] - buffer = data[1] + path, buffer = data + category = pathlib.Path(path).parent.name return dict( label=Label.from_category(category, categories=self.categories), - path=image_path, + path=path, image=EncodedImage.from_file(buffer), ) def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig ) -> IterDataPipe[Dict[str, Any]]: - images_dp = resource_dps[0] - images_dp = hint_sharding(images_dp) - images_dp = hint_shuffling(images_dp) - return Mapper(images_dp, self._prepare_sample) + dp = resource_dps[0] + dp = hint_sharding(dp) + dp = hint_shuffling(dp) + return Mapper(dp, self._prepare_sample) From c0107b915f86436ee88a2e83e5fdc896dd9e2db6 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Tue, 22 Feb 2022 17:36:10 +0000 Subject: [PATCH 4/9] add eurosat to __init__ --- torchvision/prototype/datasets/_builtin/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py index 9fdfca904f5..44e27950214 100644 --- a/torchvision/prototype/datasets/_builtin/__init__.py +++ b/torchvision/prototype/datasets/_builtin/__init__.py @@ -5,6 +5,7 @@ from .coco import Coco from .cub200 import CUB200 from .dtd import DTD +from .eurosat import EuroSAT from .fer2013 import FER2013 from .gtsrb import GTSRB from .imagenet import ImageNet From c411098e9be97aadc0b8c998a3e06350841fa6d5 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Tue, 22 Feb 2022 22:41:23 +0000 Subject: [PATCH 5/9] fix pathlib error --- test/builtin_dataset_mocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index 5a6f04e1f72..0538ee9da04 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -1303,7 +1303,7 @@ def cub200(info, root, config): def eurosat(info, root, config): print("info", info) data_folder = pathlib.Path(root, "eurosat", "2750") - data_folder.mkdir(data_folder) + data_folder.mkdir(parents=True) num_examples_per_class = 3 classes = ("AnnualCrop", "Forest") From 1b0ef7c26786c3e5260971748f96481de92eec2d Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Wed, 23 Feb 2022 13:12:11 +0000 Subject: [PATCH 6/9] create dataset zipfile and revert pre commit changes --- .pre-commit-config.yaml | 8 -------- test/builtin_dataset_mocks.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c38139349c5..dccc1c1a9b2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,17 +3,12 @@ repos: rev: v4.0.1 hooks: - id: check-docstring-first - language_version: python3 - id: check-toml - language_version: python3 - id: check-yaml exclude: packaging/.* - language_version: python3 - id: mixed-line-ending args: [--fix=lf] - language_version: python3 - id: end-of-file-fixer - language_version: python3 - repo: https://github.com/omnilib/ufmt rev: v1.3.2 @@ -22,17 +17,14 @@ repos: additional_dependencies: - black == 21.9b0 - usort == 0.6.4 - language_version: python3 - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 args: [--config=setup.cfg] - language_version: python3 - repo: https://github.com/PyCQA/pydocstyle rev: 6.1.1 hooks: - id: pydocstyle - language_version: python3 diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index 0538ee9da04..964acd767f2 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -1314,7 +1314,7 @@ def eurosat(info, root, config): file_name_fn=lambda idx: f"{cls}_{idx}.jpg", num_examples=num_examples_per_class, ) - + make_zip(root, "EuroSAT.zip", data_folder) return len(classes) * num_examples_per_class From 91a1dd0f2c126dd85407fa0def8114780873e0a8 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Wed, 23 Feb 2022 13:18:15 +0000 Subject: [PATCH 7/9] remove unecessary variable in resources --- torchvision/prototype/datasets/_builtin/eurosat.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/eurosat.py b/torchvision/prototype/datasets/_builtin/eurosat.py index e03e0b19582..fdbba077669 100644 --- a/torchvision/prototype/datasets/_builtin/eurosat.py +++ b/torchvision/prototype/datasets/_builtin/eurosat.py @@ -26,11 +26,12 @@ def _make_info(self) -> DatasetInfo: ) def resources(self, config: DatasetConfig) -> List[OnlineResource]: - data = HttpResource( - "https://madm.dfki.de/files/sentinel/EuroSAT.zip", - sha256="8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd", - ) - return [data] + return [ + HttpResource( + "https://madm.dfki.de/files/sentinel/EuroSAT.zip", + sha256="8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd", + ) + ] def _prepare_sample(self, data: Tuple[str, Any]) -> Dict[str, Any]: path, buffer = data From 04435011f907e9310adb8cde344f6b878a0497c7 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Tue, 1 Mar 2022 10:03:36 +0000 Subject: [PATCH 8/9] revert auto formatter changes and modify ufmt version --- CONTRIBUTING.md | 2 +- test/builtin_dataset_mocks.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9b6204366c..c2d05b9000f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -83,7 +83,7 @@ Instead of relying directly on `black` however, we rely on [ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook internal infrastructure. -To format your code, install `ufmt` with `pip install ufmt` and use e.g.: +To format your code, install `ufmt` with `pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4` and use e.g.: ```bash ufmt format torchvision diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index 964acd767f2..7c388d01f46 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -10,13 +10,13 @@ import pickle import random import xml.etree.ElementTree as ET -from collections import Counter, defaultdict +from collections import defaultdict, Counter import numpy as np import PIL.Image import pytest import torch -from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip +from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file from torch.nn.functional import one_hot from torch.testing import make_tensor as _make_tensor from torchvision.prototype.datasets._api import find @@ -1301,7 +1301,6 @@ def cub200(info, root, config): @register_mock def eurosat(info, root, config): - print("info", info) data_folder = pathlib.Path(root, "eurosat", "2750") data_folder.mkdir(parents=True) From 14f861083a07a5009261e8db446404b09f499473 Mon Sep 17 00:00:00 2001 From: Dbhasin1 Date: Wed, 2 Mar 2022 06:17:39 +0000 Subject: [PATCH 9/9] revert change to contributing guide --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c2d05b9000f..e9b6204366c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -83,7 +83,7 @@ Instead of relying directly on `black` however, we rely on [ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook internal infrastructure. -To format your code, install `ufmt` with `pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4` and use e.g.: +To format your code, install `ufmt` with `pip install ufmt` and use e.g.: ```bash ufmt format torchvision