From 16b46c801b5f7bb90f5b189b3f0deff3f1db7b1c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Apr 2023 10:07:04 +0200 Subject: [PATCH 1/4] benchmark ssdlite detection pipeline --- datasets.py | 95 +++++++++- main.py | 10 +- results/20230331121654.log | 257 -------------------------- results/20230403073901.log | 363 +++++++++++++++++++++++++++++++++++++ tasks.py | 10 +- transforms.py | 301 +++++++++++++++++++++++++++++- 6 files changed, 769 insertions(+), 267 deletions(-) delete mode 100644 results/20230331121654.log create mode 100644 results/20230403073901.log diff --git a/datasets.py b/datasets.py index 00bd36f..b4e3a7c 100644 --- a/datasets.py +++ b/datasets.py @@ -1,13 +1,102 @@ -import torch +import pathlib + +from torch.hub import tqdm +from torchvision import datasets from torchvision.transforms import functional as F_v1 +COCO_ROOT = "~/datasets/coco" + +__all__ = ["classification_dataset_builder", "detection_dataset_builder"] -def classification_dataset_builder(*, input_type, api_version, rng, num_samples): + +def classification_dataset_builder(*, api_version, rng, num_samples): return [ F_v1.to_pil_image( # average size of images in ImageNet - torch.randint(0, 256, (3, 469, 387), dtype=torch.uint8, generator=rng) + torch.randint(0, 256, (3, 469, 387), dtype=torch.uint8, generator=rng), ) for _ in range(num_samples) ] + + +def detection_dataset_builder(*, api_version, rng, num_samples): + root = pathlib.Path(COCO_ROOT).expanduser().resolve() + image_folder = str(root / "train2017") + annotation_file = str(root / "annotations" / "instances_train2017.json") + if api_version == "v1": + dataset = CocoDetectionV1(image_folder, annotation_file, transforms=None) + elif api_version == "v2": + dataset = datasets.CocoDetection(image_folder, annotation_file) + else: + raise ValueError(f"Got {api_version=}") + + dataset = _coco_remove_images_without_annotations(dataset) + + idcs = torch.randperm(len(dataset), generator=rng)[:num_samples] + print(f"Caching {num_samples} COCO samples") + return [dataset[idx] for idx in tqdm(idcs.tolist())] + + +# everything below is copy-pasted from +# https://github.com/pytorch/vision/blob/main/references/detection/coco_utils.py + +import torch +import torchvision + + +class CocoDetectionV1(torchvision.datasets.CocoDetection): + def __init__(self, img_folder, ann_file, transforms): + super().__init__(img_folder, ann_file) + self._transforms = transforms + + def __getitem__(self, idx): + img, target = super().__getitem__(idx) + image_id = self.ids[idx] + target = dict(image_id=image_id, annotations=target) + if self._transforms is not None: + img, target = self._transforms(img, target) + return img, target + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_only_empty_bbox(anno): + return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) + + def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + min_keypoints_per_image = 10 + + def _has_valid_annotation(anno): + # if it's empty, there is no annotation + if len(anno) == 0: + return False + # if all boxes have close to zero area, there is no annotation + if _has_only_empty_bbox(anno): + return False + # keypoints task have a slight different criteria for considering + # if an annotation is valid + if "keypoints" not in anno[0]: + return True + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) >= min_keypoints_per_image: + return True + return False + + if not isinstance(dataset, torchvision.datasets.CocoDetection): + raise TypeError( + f"This function expects dataset of type torchvision.datasets.CocoDetection, instead got {type(dataset)}" + ) + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset diff --git a/main.py b/main.py index 054c6c9..a9e8b92 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,10 @@ def write(self, message): self.stdout.write(message) self.file.write(message) + def flush(self): + self.stdout.flush() + self.file.flush() + def main(*, input_types, tasks, num_samples): # This is hardcoded when using a DataLoader with multiple workers: @@ -111,7 +115,11 @@ def main(*, input_types, tasks, num_samples): with contextlib.redirect_stdout(tee): main( - tasks=["classification-simple", "classification-complex"], + tasks=[ + "classification-simple", + "classification-complex", + "detection-ssdlite", + ], input_types=["Tensor", "PIL", "Datapoint"], num_samples=10_000, ) diff --git a/results/20230331121654.log b/results/20230331121654.log deleted file mode 100644 index 83f9c9e..0000000 --- a/results/20230331121654.log +++ /dev/null @@ -1,257 +0,0 @@ -############################################################ -classification-simple -############################################################ -input_type='Tensor', api_version='v1' - -Results computed for 10_000 samples - - median std -PILToTensor 110 µs +- 11 µs -RandomResizedCropWithoutResizeV1 54 µs +- 7 µs -Resize 645 µs +- 170 µs -RandomHorizontalFlip 21 µs +- 11 µs -ConvertImageDtype 48 µs +- 10 µs -Normalize 75 µs +- 10 µs - -total 953 µs ------------------------------------------------------------- -input_type='Tensor', api_version='v2' - -Results computed for 10_000 samples - - median std -PILToTensor 119 µs +- 9 µs -RandomResizedCropWithoutResizeV2 54 µs +- 13 µs -Resize 653 µs +- 214 µs -RandomHorizontalFlip 32 µs +- 13 µs -ConvertDtype 42 µs +- 4 µs -Normalize 62 µs +- 6 µs - -total 962 µs ------------------------------------------------------------- -input_type='PIL', api_version='v1' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV1 73 µs +- 19 µs -Resize 564 µs +- 155 µs -RandomHorizontalFlip 25 µs +- 21 µs -PILToTensor 51 µs +- 5 µs -ConvertImageDtype 50 µs +- 5 µs -Normalize 438 µs +- 40 µs - -total 1202 µs ------------------------------------------------------------- -input_type='PIL', api_version='v2' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV2 77 µs +- 13 µs -Resize 575 µs +- 159 µs -RandomHorizontalFlip 31 µs +- 24 µs -PILToTensor 60 µs +- 6 µs -ConvertDtype 44 µs +- 4 µs -Normalize 424 µs +- 43 µs - -total 1212 µs ------------------------------------------------------------- -input_type='Datapoint', api_version='v2' - -Results computed for 10_000 samples - - median std -ToImageTensor 122 µs +- 10 µs -RandomResizedCropWithoutResizeV2 59 µs +- 7 µs -Resize 647 µs +- 163 µs -RandomHorizontalFlip 38 µs +- 13 µs -ConvertDtype 46 µs +- 4 µs -Normalize 65 µs +- 6 µs - -total 978 µs ------------------------------------------------------------- - -Summaries - - v2 / v1 -Tensor 1.01 -PIL 1.01 - - x / PIL, v1 -Tensor, v1 0.79 -Tensor, v2 0.80 -PIL, v1 1.00 -PIL, v2 1.01 -Datapoint, v2 0.81 -############################################################ -classification-complex -############################################################ -input_type='Tensor', api_version='v1' - -Results computed for 10_000 samples - - median std -PILToTensor 113 µs +- 9 µs -RandomResizedCropWithoutResizeV1 54 µs +- 6 µs -Resize 633 µs +- 165 µs -RandomHorizontalFlip 26 µs +- 8 µs -AutoAugment 782 µs +- 587 µs -RandomErasing 15 µs +- 35 µs -ConvertImageDtype 48 µs +- 5 µs -Normalize 75 µs +- 6 µs - -total 1745 µs ------------------------------------------------------------- -input_type='Tensor', api_version='v2' - -Results computed for 10_000 samples - - median std -PILToTensor 118 µs +- 9 µs -RandomResizedCropWithoutResizeV2 55 µs +- 7 µs -Resize 634 µs +- 158 µs -RandomHorizontalFlip 34 µs +- 11 µs -AutoAugment 624 µs +- 484 µs -RandomErasing 19 µs +- 37 µs -ConvertDtype 42 µs +- 3 µs -Normalize 62 µs +- 6 µs - -total 1588 µs ------------------------------------------------------------- -input_type='PIL', api_version='v1' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV1 78 µs +- 15 µs -Resize 577 µs +- 160 µs -RandomHorizontalFlip 28 µs +- 22 µs -AutoAugment 334 µs +- 230 µs -PILToTensor 56 µs +- 7 µs -RandomErasing 15 µs +- 35 µs -ConvertImageDtype 50 µs +- 8 µs -Normalize 444 µs +- 46 µs - -total 1582 µs ------------------------------------------------------------- -input_type='PIL', api_version='v2' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV2 77 µs +- 13 µs -Resize 569 µs +- 154 µs -RandomHorizontalFlip 24 µs +- 24 µs -AutoAugment 278 µs +- 232 µs -PILToTensor 62 µs +- 6 µs -RandomErasing 17 µs +- 36 µs -ConvertDtype 43 µs +- 6 µs -Normalize 418 µs +- 38 µs - -total 1487 µs ------------------------------------------------------------- -input_type='Datapoint', api_version='v2' - -Results computed for 10_000 samples - - median std -ToImageTensor 124 µs +- 9 µs -RandomResizedCropWithoutResizeV2 60 µs +- 7 µs -Resize 633 µs +- 160 µs -RandomHorizontalFlip 39 µs +- 13 µs -AutoAugment 622 µs +- 414 µs -RandomErasing 19 µs +- 40 µs -ConvertDtype 47 µs +- 4 µs -Normalize 65 µs +- 7 µs - -total 1609 µs ------------------------------------------------------------- - -Summaries - - v2 / v1 -Tensor 0.91 -PIL 0.94 - - x / PIL, v1 -Tensor, v1 1.10 -Tensor, v2 1.00 -PIL, v1 1.00 -PIL, v2 0.94 -Datapoint, v2 1.02 -############################################################ -Collecting environment information... -PyTorch version: 2.1.0.dev20230326+cpu -Is debug build: False -CUDA used to build PyTorch: Could not collect -ROCM used to build PyTorch: N/A - -OS: Arch Linux (x86_64) -GCC version: (GCC) 12.2.1 20230201 -Clang version: 15.0.7 -CMake version: version 3.25.3 -Libc version: glibc-2.37 - -Python version: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] (64-bit runtime) -Python platform: Linux-6.2.6-arch1-1-x86_64-with-glibc2.17 -Is CUDA available: False -CUDA runtime version: 11.7.99 -CUDA_MODULE_LOADING set to: N/A -GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080 -Nvidia driver version: 525.89.02 -cuDNN version: Could not collect -HIP runtime version: N/A -MIOpen runtime version: N/A -Is XNNPACK available: True - -CPU: -Architecture: x86_64 -CPU op-mode(s): 32-bit, 64-bit -Address sizes: 48 bits physical, 48 bits virtual -Byte Order: Little Endian -CPU(s): 24 -On-line CPU(s) list: 0-23 -Vendor ID: AuthenticAMD -Model name: AMD Ryzen 9 5900X 12-Core Processor -CPU family: 25 -Model: 33 -Thread(s) per core: 2 -Core(s) per socket: 12 -Socket(s): 1 -Stepping: 0 -Frequency boost: enabled -CPU(s) scaling MHz: 50% -CPU max MHz: 4950,1948 -CPU min MHz: 2200,0000 -BogoMIPS: 7389,95 -Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm -Virtualization: AMD-V -L1d cache: 384 KiB (12 instances) -L1i cache: 384 KiB (12 instances) -L2 cache: 6 MiB (12 instances) -L3 cache: 64 MiB (2 instances) -NUMA node(s): 1 -NUMA node0 CPU(s): 0-23 -Vulnerability Itlb multihit: Not affected -Vulnerability L1tf: Not affected -Vulnerability Mds: Not affected -Vulnerability Meltdown: Not affected -Vulnerability Mmio stale data: Not affected -Vulnerability Retbleed: Not affected -Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl -Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization -Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected -Vulnerability Srbds: Not affected -Vulnerability Tsx async abort: Not affected - -Versions of relevant libraries: -[pip3] light-the-torch==0.7.2 -[pip3] mypy-extensions==1.0.0 -[pip3] numpy==1.24.1 -[pip3] torch==2.1.0.dev20230326+cpu -[pip3] torchvision==0.16.0.dev20230326+cpu -[conda] light-the-torch 0.7.2 pypi_0 pypi -[conda] numpy 1.24.1 pypi_0 pypi -[conda] torch 2.1.0.dev20230326+cpu pypi_0 pypi -[conda] torchvision 0.16.0.dev20230326+cpu pypi_0 pypi diff --git a/results/20230403073901.log b/results/20230403073901.log new file mode 100644 index 0000000..354de98 --- /dev/null +++ b/results/20230403073901.log @@ -0,0 +1,363 @@ +############################################################ +classification-simple +############################################################ +input_type='Tensor', api_version='v1' + +Results computed for 10_000 samples + + median std +PILToTensor 106 µs +- 8 µs +RandomResizedCropWithoutResizeV1 50 µs +- 7 µs +Resize 619 µs +- 157 µs +RandomHorizontalFlip 24 µs +- 9 µs +ConvertImageDtype 46 µs +- 7 µs +Normalize 73 µs +- 8 µs + +total 918 µs +------------------------------------------------------------ +input_type='Tensor', api_version='v2' + +Results computed for 10_000 samples + + median std +PILToTensor 118 µs +- 10 µs +RandomResizedCropWithoutResizeV2 54 µs +- 7 µs +Resize 647 µs +- 168 µs +RandomHorizontalFlip 34 µs +- 11 µs +ConvertDtype 43 µs +- 4 µs +Normalize 63 µs +- 7 µs + +total 959 µs +------------------------------------------------------------ +input_type='PIL', api_version='v1' + +Results computed for 10_000 samples + + median std +RandomResizedCropWithoutResizeV1 76 µs +- 12 µs +Resize 583 µs +- 160 µs +RandomHorizontalFlip 52 µs +- 23 µs +PILToTensor 53 µs +- 5 µs +ConvertImageDtype 52 µs +- 6 µs +Normalize 451 µs +- 44 µs + +total 1265 µs +------------------------------------------------------------ +input_type='PIL', api_version='v2' + +Results computed for 10_000 samples + + median std +RandomResizedCropWithoutResizeV2 77 µs +- 11 µs +Resize 578 µs +- 159 µs +RandomHorizontalFlip 59 µs +- 24 µs +PILToTensor 60 µs +- 4 µs +ConvertDtype 45 µs +- 3 µs +Normalize 430 µs +- 36 µs + +total 1248 µs +------------------------------------------------------------ +input_type='Datapoint', api_version='v2' + +Results computed for 10_000 samples + + median std +ToImageTensor 121 µs +- 8 µs +RandomResizedCropWithoutResizeV2 60 µs +- 7 µs +Resize 651 µs +- 163 µs +RandomHorizontalFlip 38 µs +- 13 µs +ConvertDtype 47 µs +- 4 µs +Normalize 66 µs +- 7 µs + +total 983 µs +------------------------------------------------------------ + +Summaries + + v2 / v1 +Tensor 1.04 +PIL 0.99 + + x / PIL, v1 +Tensor, v1 0.73 +Tensor, v2 0.76 +PIL, v1 1.00 +PIL, v2 0.99 +Datapoint, v2 0.78 +############################################################ +classification-complex +############################################################ +input_type='Tensor', api_version='v1' + +Results computed for 10_000 samples + + median std +PILToTensor 114 µs +- 8 µs +RandomResizedCropWithoutResizeV1 55 µs +- 6 µs +Resize 649 µs +- 165 µs +RandomHorizontalFlip 27 µs +- 9 µs +AutoAugment 803 µs +- 565 µs +RandomErasing 15 µs +- 36 µs +ConvertImageDtype 50 µs +- 4 µs +Normalize 79 µs +- 5 µs + +total 1793 µs +------------------------------------------------------------ +input_type='Tensor', api_version='v2' + +Results computed for 10_000 samples + + median std +PILToTensor 120 µs +- 10 µs +RandomResizedCropWithoutResizeV2 56 µs +- 7 µs +Resize 655 µs +- 164 µs +RandomHorizontalFlip 28 µs +- 11 µs +AutoAugment 637 µs +- 467 µs +RandomErasing 19 µs +- 39 µs +ConvertDtype 44 µs +- 4 µs +Normalize 64 µs +- 6 µs + +total 1624 µs +------------------------------------------------------------ +input_type='PIL', api_version='v1' + +Results computed for 10_000 samples + + median std +RandomResizedCropWithoutResizeV1 80 µs +- 16 µs +Resize 568 µs +- 157 µs +RandomHorizontalFlip 24 µs +- 22 µs +AutoAugment 332 µs +- 228 µs +PILToTensor 55 µs +- 7 µs +RandomErasing 15 µs +- 34 µs +ConvertImageDtype 52 µs +- 13 µs +Normalize 441 µs +- 39 µs + +total 1566 µs +------------------------------------------------------------ +input_type='PIL', api_version='v2' + +Results computed for 10_000 samples + + median std +RandomResizedCropWithoutResizeV2 80 µs +- 12 µs +Resize 588 µs +- 159 µs +RandomHorizontalFlip 60 µs +- 25 µs +AutoAugment 288 µs +- 238 µs +PILToTensor 65 µs +- 6 µs +RandomErasing 19 µs +- 38 µs +ConvertDtype 46 µs +- 4 µs +Normalize 433 µs +- 38 µs + +total 1579 µs +------------------------------------------------------------ +input_type='Datapoint', api_version='v2' + +Results computed for 10_000 samples + + median std +ToImageTensor 126 µs +- 14 µs +RandomResizedCropWithoutResizeV2 63 µs +- 8 µs +Resize 641 µs +- 163 µs +RandomHorizontalFlip 40 µs +- 14 µs +AutoAugment 626 µs +- 414 µs +RandomErasing 20 µs +- 41 µs +ConvertDtype 48 µs +- 4 µs +Normalize 68 µs +- 10 µs + +total 1633 µs +------------------------------------------------------------ + +Summaries + + v2 / v1 +Tensor 0.91 +PIL 1.01 + + x / PIL, v1 +Tensor, v1 1.14 +Tensor, v2 1.04 +PIL, v1 1.00 +PIL, v2 1.01 +Datapoint, v2 1.04 +############################################################ +detection-ssdlite +############################################################ +loading annotations into memory... +Done (t=9.03s) +creating index... +index created! +Caching 10000 COCO samples +input_type='Tensor', api_version='v1' + +Results computed for 10_000 samples + + median std +DetectionReferenceConvertCocoPolysToMaskV1 2876 µs +- 4445 µs +DetectionReferencePILToTensorV1 269 µs +- 71 µs +DetectionReferenceRandomIoUCropV1 453 µs +- 7086 µs +DetectionReferenceRandomHorizontalFlipV1 29 µs +- 243 µs +DetectionReferenceConvertImageDtypeV1 293 µs +- 184 µs + +total 3921 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=12.46s) +creating index... +index created! +Caching 10000 COCO samples +input_type='Tensor', api_version='v2' + +Results computed for 10_000 samples + + median std +WrapCocoDetectionReferenceSampleForTransformsV2 1610 µs +- 2546 µs +PILToTensor 757 µs +- 18357 µs +RandomIoUCrop 1829 µs +- 20137 µs +RandomHorizontalFlip 554 µs +- 19808 µs +ConvertDtype 771 µs +- 18949 µs +SanitizeBoundingBox 1001 µs +- 16996 µs + +total 6521 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=14.08s) +creating index... +index created! +Caching 10000 COCO samples +input_type='PIL', api_version='v1' + +Results computed for 10_000 samples + + median std +DetectionReferenceConvertCocoPolysToMaskV1 3006 µs +- 4571 µs +DetectionReferenceRandomIoUCropV1 604 µs +- 7082 µs +DetectionReferenceRandomHorizontalFlipV1 132 µs +- 255 µs +DetectionReferencePILToTensorV1 193 µs +- 133 µs +DetectionReferenceConvertImageDtypeV1 332 µs +- 170 µs + +total 4268 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=10.88s) +creating index... +index created! +Caching 10000 COCO samples +input_type='PIL', api_version='v2' + +Results computed for 10_000 samples + + median std +WrapCocoDetectionReferenceSampleForTransformsV2 1677 µs +- 2587 µs +RandomIoUCrop 1807 µs +- 21534 µs +RandomHorizontalFlip 579 µs +- 21147 µs +PILToTensor 692 µs +- 16187 µs +ConvertDtype 792 µs +- 19596 µs +SanitizeBoundingBox 1016 µs +- 18356 µs + +total 6562 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=13.01s) +creating index... +index created! +Caching 10000 COCO samples +input_type='Datapoint', api_version='v2' + +Results computed for 10_000 samples + + median std +WrapCocoDetectionReferenceSampleForTransformsV2 1699 µs +- 2608 µs +ToImageTensor 883 µs +- 19872 µs +RandomIoUCrop 1614 µs +- 22483 µs +RandomHorizontalFlip 548 µs +- 16108 µs +ConvertDtype 748 µs +- 20230 µs +SanitizeBoundingBox 1009 µs +- 20313 µs + +total 6500 µs +------------------------------------------------------------ + +Summaries + + v2 / v1 +Tensor 1.66 +PIL 1.54 + + x / PIL, v1 +Tensor, v1 0.92 +Tensor, v2 1.53 +PIL, v1 1.00 +PIL, v2 1.54 +Datapoint, v2 1.52 +############################################################ +Collecting environment information... +PyTorch version: 2.1.0.dev20230326+cpu +Is debug build: False +CUDA used to build PyTorch: Could not collect +ROCM used to build PyTorch: N/A + +OS: Arch Linux (x86_64) +GCC version: (GCC) 12.2.1 20230201 +Clang version: 15.0.7 +CMake version: version 3.25.3 +Libc version: glibc-2.37 + +Python version: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-6.2.6-arch1-1-x86_64-with-glibc2.17 +Is CUDA available: False +CUDA runtime version: 11.7.99 +CUDA_MODULE_LOADING set to: N/A +GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080 +Nvidia driver version: 525.89.02 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 48 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 24 +On-line CPU(s) list: 0-23 +Vendor ID: AuthenticAMD +Model name: AMD Ryzen 9 5900X 12-Core Processor +CPU family: 25 +Model: 33 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU(s) scaling MHz: 52% +CPU max MHz: 4950,1948 +CPU min MHz: 2200,0000 +BogoMIPS: 7389,03 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm +Virtualization: AMD-V +L1d cache: 384 KiB (12 instances) +L1i cache: 384 KiB (12 instances) +L2 cache: 6 MiB (12 instances) +L3 cache: 64 MiB (2 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-23 +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected + +Versions of relevant libraries: +[pip3] light-the-torch==0.7.2 +[pip3] mypy-extensions==1.0.0 +[pip3] numpy==1.24.1 +[pip3] torch==2.1.0.dev20230326+cpu +[pip3] torchvision==0.16.0.dev20230326+cpu +[conda] Could not collect diff --git a/tasks.py b/tasks.py index 14d46c5..fe0c62e 100644 --- a/tasks.py +++ b/tasks.py @@ -1,7 +1,8 @@ -from datasets import classification_dataset_builder +from datasets import classification_dataset_builder, detection_dataset_builder from transforms import ( - classification_simple_pipeline_builder, classification_complex_pipeline_builder, + classification_simple_pipeline_builder, + detection_ssdlite_pipeline_builder, ) TASKS = { @@ -13,6 +14,10 @@ classification_complex_pipeline_builder, classification_dataset_builder, ), + "detection-ssdlite": ( + detection_ssdlite_pipeline_builder, + detection_dataset_builder, + ), } @@ -24,7 +29,6 @@ def make_task(name, *, input_type, api_version, dataset_rng, num_samples): return None dataset = dataset_builder( - input_type=input_type, api_version=api_version, rng=dataset_rng, num_samples=num_samples, diff --git a/transforms.py b/transforms.py index 0efa863..7828557 100644 --- a/transforms.py +++ b/transforms.py @@ -1,12 +1,19 @@ +import functools from time import perf_counter_ns - -import torch +from types import SimpleNamespace import torchvision.transforms.v2 as transforms_v2 -from torchvision import transforms as transforms_v1 +from torchvision import datasets, transforms as transforms_v1 +from torchvision.datapoints._dataset_wrapper import WRAPPER_FACTORIES from torchvision.transforms import functional as F_v1 from torchvision.transforms.v2 import functional as F_v2 +__all__ = [ + "classification_simple_pipeline_builder", + "classification_complex_pipeline_builder", + "detection_ssdlite_pipeline_builder", +] + class Pipeline: def __init__(self, transforms): @@ -126,6 +133,59 @@ def classification_complex_pipeline_builder(*, input_type, api_version): return Pipeline(pipeline) +def detection_ssdlite_pipeline_builder(*, input_type, api_version): + if input_type == "Datapoint" and api_version == "v1": + return None + + pipeline = [] + if api_version == "v1": + pipeline.append(DetectionReferenceConvertCocoPolysToMaskV1()) + + if input_type == "Tensor": + pipeline.append(DetectionReferencePILToTensorV1()) + + pipeline.extend( + [ + DetectionReferenceRandomIoUCropV1(), + DetectionReferenceRandomHorizontalFlipV1(p=0.5), + ] + ) + + if input_type == "PIL": + pipeline.append(DetectionReferencePILToTensorV1()) + + pipeline.append(DetectionReferenceConvertImageDtypeV1(torch.float)) + + elif api_version == "v2": + pipeline.append(WrapCocoDetectionReferenceSampleForTransformsV2()) + + if input_type == "Tensor": + pipeline.append(transforms_v2.PILToTensor()) + elif input_type == "Datapoint": + pipeline.append(transforms_v2.ToImageTensor()) + + pipeline.extend( + [ + transforms_v2.RandomIoUCrop(), + transforms_v2.RandomHorizontalFlip(p=0.5), + ] + ) + + if input_type == "PIL": + pipeline.append(transforms_v2.PILToTensor()) + + pipeline.extend( + [ + transforms_v2.ConvertDtype(torch.float), + transforms_v2.SanitizeBoundingBox(), + ] + ) + else: + raise ValueError(f"Got {api_version=}") + + return Pipeline(pipeline) + + class RandomResizedCropWithoutResizeV1(transforms_v1.RandomResizedCrop): def forward(self, img): i, j, h, w = self.get_params(img, self.scale, self.ratio) @@ -135,3 +195,238 @@ def forward(self, img): class RandomResizedCropWithoutResizeV2(transforms_v2.RandomResizedCrop): def _transform(self, inpt, params): return F_v2.crop(inpt, **params) + + +class WrapCocoDetectionReferenceSampleForTransformsV2: + def __init__(self): + num_samples = 117_266 + wrapper_factory = WRAPPER_FACTORIES[datasets.CocoDetection] + mock_dataset = SimpleNamespace(ids=list(range(num_samples))) + wrapper = wrapper_factory(mock_dataset) + self.wrapper = functools.partial(wrapper, num_samples // 2) + + def __call__(self, *inputs): + return self.wrapper(inputs if len(inputs) > 1 else inputs[0]) + + +# everything below is copy-pasted from +# https://github.com/pytorch/vision/blob/main/references/detection/coco_utils.py + +from typing import Dict, List, Optional, Tuple + +import torch +import torchvision +from pycocotools import mask as coco_mask +from torch import nn, Tensor + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +class DetectionReferenceConvertCocoPolysToMaskV1: + def __call__(self, image, target): + w, h = image.size + + try: + image_id = target["image_id"] + except: + raise + image_id = torch.tensor([image_id]) + + anno = target["annotations"] + + anno = [obj for obj in anno if obj["iscrowd"] == 0] + + boxes = [obj["bbox"] for obj in anno] + # guard against no boxes via resizing + boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) + boxes[:, 2:] += boxes[:, :2] + boxes[:, 0::2].clamp_(min=0, max=w) + boxes[:, 1::2].clamp_(min=0, max=h) + + classes = [obj["category_id"] for obj in anno] + classes = torch.tensor(classes, dtype=torch.int64) + + segmentations = [obj["segmentation"] for obj in anno] + masks = convert_coco_poly_to_mask(segmentations, h, w) + + keypoints = None + if anno and "keypoints" in anno[0]: + keypoints = [obj["keypoints"] for obj in anno] + keypoints = torch.as_tensor(keypoints, dtype=torch.float32) + num_keypoints = keypoints.shape[0] + if num_keypoints: + keypoints = keypoints.view(num_keypoints, -1, 3) + + keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + + target = {} + target["boxes"] = boxes + target["labels"] = classes + target["masks"] = masks + target["image_id"] = image_id + if keypoints is not None: + target["keypoints"] = keypoints + + # for conversion to coco api + area = torch.tensor([obj["area"] for obj in anno]) + iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) + target["area"] = area + target["iscrowd"] = iscrowd + + return image, target + + +class DetectionReferenceRandomHorizontalFlipV1(transforms_v1.RandomHorizontalFlip): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torch.rand(1) < self.p: + image = F_v1.hflip(image) + if target is not None: + _, _, width = F_v1.get_dimensions(image) + target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] + if "masks" in target: + target["masks"] = target["masks"].flip(-1) + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = _flip_coco_person_keypoints(keypoints, width) + target["keypoints"] = keypoints + return image, target + + +class DetectionReferencePILToTensorV1(nn.Module): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F_v1.pil_to_tensor(image) + return image, target + + +class DetectionReferenceConvertImageDtypeV1(nn.Module): + def __init__(self, dtype: torch.dtype) -> None: + super().__init__() + self.dtype = dtype + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F_v1.convert_image_dtype(image, self.dtype) + return image, target + + +class DetectionReferenceRandomIoUCropV1(nn.Module): + def __init__( + self, + min_scale: float = 0.3, + max_scale: float = 1.0, + min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2.0, + sampler_options: Optional[List[float]] = None, + trials: int = 40, + ): + super().__init__() + # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + if sampler_options is None: + sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] + self.options = sampler_options + self.trials = trials + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if target is None: + raise ValueError("The targets can't be None for this transform.") + + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError( + f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." + ) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + _, orig_h, orig_w = F_v1.get_dimensions(image) + + while True: + # sample an option + idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) + min_jaccard_overlap = self.options[idx] + if ( + min_jaccard_overlap >= 1.0 + ): # a value larger than 1 encodes the leave as-is option + return image, target + + for _ in range(self.trials): + # check the aspect ratio limitations + r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) + new_w = int(orig_w * r[0]) + new_h = int(orig_h * r[1]) + aspect_ratio = new_w / new_h + if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): + continue + + # check for 0 area crops + r = torch.rand(2) + left = int((orig_w - new_w) * r[0]) + top = int((orig_h - new_h) * r[1]) + right = left + new_w + bottom = top + new_h + if left == right or top == bottom: + continue + + # check for any valid boxes with centers within the crop area + cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) + cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) + is_within_crop_area = ( + (left < cx) & (cx < right) & (top < cy) & (cy < bottom) + ) + if not is_within_crop_area.any(): + continue + + # check at least 1 box with jaccard limitations + boxes = target["boxes"][is_within_crop_area] + ious = torchvision.ops.boxes.box_iou( + boxes, + torch.tensor( + [[left, top, right, bottom]], + dtype=boxes.dtype, + device=boxes.device, + ), + ) + if ious.max() < min_jaccard_overlap: + continue + + # keep only valid boxes and perform cropping + target["boxes"] = boxes + target["labels"] = target["labels"][is_within_crop_area] + target["boxes"][:, 0::2] -= left + target["boxes"][:, 1::2] -= top + target["boxes"][:, 0::2].clamp_(min=0, max=new_w) + target["boxes"][:, 1::2].clamp_(min=0, max=new_h) + image = F_v1.crop(image, top, left, new_h, new_w) + + return image, target From c80dc73fbc16502df645f5335d316d22ecb8c937 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Apr 2023 14:37:05 +0200 Subject: [PATCH 2/4] improve summary --- main.py | 18 +- results/20230403073901.log | 363 ------------------------------------- 2 files changed, 12 insertions(+), 369 deletions(-) delete mode 100644 results/20230403073901.log diff --git a/main.py b/main.py index a9e8b92..48fb6d2 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ import contextlib -import itertools import pathlib +import string import sys from datetime import datetime @@ -98,16 +98,22 @@ def main(*, input_types, tasks, num_samples): print() - median_ref = medians["PIL"]["v1"] medians_flat = { f"{input_type}, {api_version}": median for input_type, api_versions in medians.items() for api_version, median in api_versions.items() } field_len = max(len(label) for label in medians_flat) - print(f"{' ' * field_len} x / PIL, v1") - for label, median in medians_flat.items(): - print(f"{label:{field_len}} {median / median_ref:>11.2f}") + + print( + f"{' ' * (field_len + 5)} {' '.join(f' [{id}]' for _, id in zip(range(len(medians_flat)), string.ascii_lowercase))}" + ) + for (label, val), id in zip(medians_flat.items(), string.ascii_lowercase): + print( + f"{label:>{field_len}}, [{id}] {' '.join(f'{val / ref:4.2f}' for ref in medians_flat.values())}" + ) + print() + print("Slowdown as row / col") if __name__ == "__main__": @@ -121,7 +127,7 @@ def main(*, input_types, tasks, num_samples): "detection-ssdlite", ], input_types=["Tensor", "PIL", "Datapoint"], - num_samples=10_000, + num_samples=1_000, ) print("#" * 60) diff --git a/results/20230403073901.log b/results/20230403073901.log deleted file mode 100644 index 354de98..0000000 --- a/results/20230403073901.log +++ /dev/null @@ -1,363 +0,0 @@ -############################################################ -classification-simple -############################################################ -input_type='Tensor', api_version='v1' - -Results computed for 10_000 samples - - median std -PILToTensor 106 µs +- 8 µs -RandomResizedCropWithoutResizeV1 50 µs +- 7 µs -Resize 619 µs +- 157 µs -RandomHorizontalFlip 24 µs +- 9 µs -ConvertImageDtype 46 µs +- 7 µs -Normalize 73 µs +- 8 µs - -total 918 µs ------------------------------------------------------------- -input_type='Tensor', api_version='v2' - -Results computed for 10_000 samples - - median std -PILToTensor 118 µs +- 10 µs -RandomResizedCropWithoutResizeV2 54 µs +- 7 µs -Resize 647 µs +- 168 µs -RandomHorizontalFlip 34 µs +- 11 µs -ConvertDtype 43 µs +- 4 µs -Normalize 63 µs +- 7 µs - -total 959 µs ------------------------------------------------------------- -input_type='PIL', api_version='v1' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV1 76 µs +- 12 µs -Resize 583 µs +- 160 µs -RandomHorizontalFlip 52 µs +- 23 µs -PILToTensor 53 µs +- 5 µs -ConvertImageDtype 52 µs +- 6 µs -Normalize 451 µs +- 44 µs - -total 1265 µs ------------------------------------------------------------- -input_type='PIL', api_version='v2' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV2 77 µs +- 11 µs -Resize 578 µs +- 159 µs -RandomHorizontalFlip 59 µs +- 24 µs -PILToTensor 60 µs +- 4 µs -ConvertDtype 45 µs +- 3 µs -Normalize 430 µs +- 36 µs - -total 1248 µs ------------------------------------------------------------- -input_type='Datapoint', api_version='v2' - -Results computed for 10_000 samples - - median std -ToImageTensor 121 µs +- 8 µs -RandomResizedCropWithoutResizeV2 60 µs +- 7 µs -Resize 651 µs +- 163 µs -RandomHorizontalFlip 38 µs +- 13 µs -ConvertDtype 47 µs +- 4 µs -Normalize 66 µs +- 7 µs - -total 983 µs ------------------------------------------------------------- - -Summaries - - v2 / v1 -Tensor 1.04 -PIL 0.99 - - x / PIL, v1 -Tensor, v1 0.73 -Tensor, v2 0.76 -PIL, v1 1.00 -PIL, v2 0.99 -Datapoint, v2 0.78 -############################################################ -classification-complex -############################################################ -input_type='Tensor', api_version='v1' - -Results computed for 10_000 samples - - median std -PILToTensor 114 µs +- 8 µs -RandomResizedCropWithoutResizeV1 55 µs +- 6 µs -Resize 649 µs +- 165 µs -RandomHorizontalFlip 27 µs +- 9 µs -AutoAugment 803 µs +- 565 µs -RandomErasing 15 µs +- 36 µs -ConvertImageDtype 50 µs +- 4 µs -Normalize 79 µs +- 5 µs - -total 1793 µs ------------------------------------------------------------- -input_type='Tensor', api_version='v2' - -Results computed for 10_000 samples - - median std -PILToTensor 120 µs +- 10 µs -RandomResizedCropWithoutResizeV2 56 µs +- 7 µs -Resize 655 µs +- 164 µs -RandomHorizontalFlip 28 µs +- 11 µs -AutoAugment 637 µs +- 467 µs -RandomErasing 19 µs +- 39 µs -ConvertDtype 44 µs +- 4 µs -Normalize 64 µs +- 6 µs - -total 1624 µs ------------------------------------------------------------- -input_type='PIL', api_version='v1' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV1 80 µs +- 16 µs -Resize 568 µs +- 157 µs -RandomHorizontalFlip 24 µs +- 22 µs -AutoAugment 332 µs +- 228 µs -PILToTensor 55 µs +- 7 µs -RandomErasing 15 µs +- 34 µs -ConvertImageDtype 52 µs +- 13 µs -Normalize 441 µs +- 39 µs - -total 1566 µs ------------------------------------------------------------- -input_type='PIL', api_version='v2' - -Results computed for 10_000 samples - - median std -RandomResizedCropWithoutResizeV2 80 µs +- 12 µs -Resize 588 µs +- 159 µs -RandomHorizontalFlip 60 µs +- 25 µs -AutoAugment 288 µs +- 238 µs -PILToTensor 65 µs +- 6 µs -RandomErasing 19 µs +- 38 µs -ConvertDtype 46 µs +- 4 µs -Normalize 433 µs +- 38 µs - -total 1579 µs ------------------------------------------------------------- -input_type='Datapoint', api_version='v2' - -Results computed for 10_000 samples - - median std -ToImageTensor 126 µs +- 14 µs -RandomResizedCropWithoutResizeV2 63 µs +- 8 µs -Resize 641 µs +- 163 µs -RandomHorizontalFlip 40 µs +- 14 µs -AutoAugment 626 µs +- 414 µs -RandomErasing 20 µs +- 41 µs -ConvertDtype 48 µs +- 4 µs -Normalize 68 µs +- 10 µs - -total 1633 µs ------------------------------------------------------------- - -Summaries - - v2 / v1 -Tensor 0.91 -PIL 1.01 - - x / PIL, v1 -Tensor, v1 1.14 -Tensor, v2 1.04 -PIL, v1 1.00 -PIL, v2 1.01 -Datapoint, v2 1.04 -############################################################ -detection-ssdlite -############################################################ -loading annotations into memory... -Done (t=9.03s) -creating index... -index created! -Caching 10000 COCO samples -input_type='Tensor', api_version='v1' - -Results computed for 10_000 samples - - median std -DetectionReferenceConvertCocoPolysToMaskV1 2876 µs +- 4445 µs -DetectionReferencePILToTensorV1 269 µs +- 71 µs -DetectionReferenceRandomIoUCropV1 453 µs +- 7086 µs -DetectionReferenceRandomHorizontalFlipV1 29 µs +- 243 µs -DetectionReferenceConvertImageDtypeV1 293 µs +- 184 µs - -total 3921 µs ------------------------------------------------------------- -loading annotations into memory... -Done (t=12.46s) -creating index... -index created! -Caching 10000 COCO samples -input_type='Tensor', api_version='v2' - -Results computed for 10_000 samples - - median std -WrapCocoDetectionReferenceSampleForTransformsV2 1610 µs +- 2546 µs -PILToTensor 757 µs +- 18357 µs -RandomIoUCrop 1829 µs +- 20137 µs -RandomHorizontalFlip 554 µs +- 19808 µs -ConvertDtype 771 µs +- 18949 µs -SanitizeBoundingBox 1001 µs +- 16996 µs - -total 6521 µs ------------------------------------------------------------- -loading annotations into memory... -Done (t=14.08s) -creating index... -index created! -Caching 10000 COCO samples -input_type='PIL', api_version='v1' - -Results computed for 10_000 samples - - median std -DetectionReferenceConvertCocoPolysToMaskV1 3006 µs +- 4571 µs -DetectionReferenceRandomIoUCropV1 604 µs +- 7082 µs -DetectionReferenceRandomHorizontalFlipV1 132 µs +- 255 µs -DetectionReferencePILToTensorV1 193 µs +- 133 µs -DetectionReferenceConvertImageDtypeV1 332 µs +- 170 µs - -total 4268 µs ------------------------------------------------------------- -loading annotations into memory... -Done (t=10.88s) -creating index... -index created! -Caching 10000 COCO samples -input_type='PIL', api_version='v2' - -Results computed for 10_000 samples - - median std -WrapCocoDetectionReferenceSampleForTransformsV2 1677 µs +- 2587 µs -RandomIoUCrop 1807 µs +- 21534 µs -RandomHorizontalFlip 579 µs +- 21147 µs -PILToTensor 692 µs +- 16187 µs -ConvertDtype 792 µs +- 19596 µs -SanitizeBoundingBox 1016 µs +- 18356 µs - -total 6562 µs ------------------------------------------------------------- -loading annotations into memory... -Done (t=13.01s) -creating index... -index created! -Caching 10000 COCO samples -input_type='Datapoint', api_version='v2' - -Results computed for 10_000 samples - - median std -WrapCocoDetectionReferenceSampleForTransformsV2 1699 µs +- 2608 µs -ToImageTensor 883 µs +- 19872 µs -RandomIoUCrop 1614 µs +- 22483 µs -RandomHorizontalFlip 548 µs +- 16108 µs -ConvertDtype 748 µs +- 20230 µs -SanitizeBoundingBox 1009 µs +- 20313 µs - -total 6500 µs ------------------------------------------------------------- - -Summaries - - v2 / v1 -Tensor 1.66 -PIL 1.54 - - x / PIL, v1 -Tensor, v1 0.92 -Tensor, v2 1.53 -PIL, v1 1.00 -PIL, v2 1.54 -Datapoint, v2 1.52 -############################################################ -Collecting environment information... -PyTorch version: 2.1.0.dev20230326+cpu -Is debug build: False -CUDA used to build PyTorch: Could not collect -ROCM used to build PyTorch: N/A - -OS: Arch Linux (x86_64) -GCC version: (GCC) 12.2.1 20230201 -Clang version: 15.0.7 -CMake version: version 3.25.3 -Libc version: glibc-2.37 - -Python version: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] (64-bit runtime) -Python platform: Linux-6.2.6-arch1-1-x86_64-with-glibc2.17 -Is CUDA available: False -CUDA runtime version: 11.7.99 -CUDA_MODULE_LOADING set to: N/A -GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080 -Nvidia driver version: 525.89.02 -cuDNN version: Could not collect -HIP runtime version: N/A -MIOpen runtime version: N/A -Is XNNPACK available: True - -CPU: -Architecture: x86_64 -CPU op-mode(s): 32-bit, 64-bit -Address sizes: 48 bits physical, 48 bits virtual -Byte Order: Little Endian -CPU(s): 24 -On-line CPU(s) list: 0-23 -Vendor ID: AuthenticAMD -Model name: AMD Ryzen 9 5900X 12-Core Processor -CPU family: 25 -Model: 33 -Thread(s) per core: 2 -Core(s) per socket: 12 -Socket(s): 1 -Stepping: 0 -Frequency boost: enabled -CPU(s) scaling MHz: 52% -CPU max MHz: 4950,1948 -CPU min MHz: 2200,0000 -BogoMIPS: 7389,03 -Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm -Virtualization: AMD-V -L1d cache: 384 KiB (12 instances) -L1i cache: 384 KiB (12 instances) -L2 cache: 6 MiB (12 instances) -L3 cache: 64 MiB (2 instances) -NUMA node(s): 1 -NUMA node0 CPU(s): 0-23 -Vulnerability Itlb multihit: Not affected -Vulnerability L1tf: Not affected -Vulnerability Mds: Not affected -Vulnerability Meltdown: Not affected -Vulnerability Mmio stale data: Not affected -Vulnerability Retbleed: Not affected -Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl -Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization -Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected -Vulnerability Srbds: Not affected -Vulnerability Tsx async abort: Not affected - -Versions of relevant libraries: -[pip3] light-the-torch==0.7.2 -[pip3] mypy-extensions==1.0.0 -[pip3] numpy==1.24.1 -[pip3] torch==2.1.0.dev20230326+cpu -[pip3] torchvision==0.16.0.dev20230326+cpu -[conda] Could not collect From a859c0931005aeb9a30d0b4496c8b0d422e23c6b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Apr 2023 14:47:17 +0200 Subject: [PATCH 3/4] drop DetectionReference prefix --- transforms.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/transforms.py b/transforms.py index 7828557..0b1ae97 100644 --- a/transforms.py +++ b/transforms.py @@ -139,25 +139,25 @@ def detection_ssdlite_pipeline_builder(*, input_type, api_version): pipeline = [] if api_version == "v1": - pipeline.append(DetectionReferenceConvertCocoPolysToMaskV1()) + pipeline.append(ConvertCocoPolysToMaskV1()) if input_type == "Tensor": - pipeline.append(DetectionReferencePILToTensorV1()) + pipeline.append(PILToTensorV1()) pipeline.extend( [ - DetectionReferenceRandomIoUCropV1(), - DetectionReferenceRandomHorizontalFlipV1(p=0.5), + RandomIoUCropV1(), + RandomHorizontalFlipV1(p=0.5), ] ) if input_type == "PIL": - pipeline.append(DetectionReferencePILToTensorV1()) + pipeline.append(PILToTensorV1()) - pipeline.append(DetectionReferenceConvertImageDtypeV1(torch.float)) + pipeline.append(ConvertImageDtypeV1(torch.float)) elif api_version == "v2": - pipeline.append(WrapCocoDetectionReferenceSampleForTransformsV2()) + pipeline.append(WrapCocoSampleForTransformsV2()) if input_type == "Tensor": pipeline.append(transforms_v2.PILToTensor()) @@ -197,7 +197,7 @@ def _transform(self, inpt, params): return F_v2.crop(inpt, **params) -class WrapCocoDetectionReferenceSampleForTransformsV2: +class WrapCocoSampleForTransformsV2: def __init__(self): num_samples = 117_266 wrapper_factory = WRAPPER_FACTORIES[datasets.CocoDetection] @@ -237,7 +237,7 @@ def convert_coco_poly_to_mask(segmentations, height, width): return masks -class DetectionReferenceConvertCocoPolysToMaskV1: +class ConvertCocoPolysToMaskV1: def __call__(self, image, target): w, h = image.size @@ -296,7 +296,7 @@ def __call__(self, image, target): return image, target -class DetectionReferenceRandomHorizontalFlipV1(transforms_v1.RandomHorizontalFlip): +class RandomHorizontalFlipV1(transforms_v1.RandomHorizontalFlip): def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: @@ -314,7 +314,7 @@ def forward( return image, target -class DetectionReferencePILToTensorV1(nn.Module): +class PILToTensorV1(nn.Module): def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: @@ -322,7 +322,7 @@ def forward( return image, target -class DetectionReferenceConvertImageDtypeV1(nn.Module): +class ConvertImageDtypeV1(nn.Module): def __init__(self, dtype: torch.dtype) -> None: super().__init__() self.dtype = dtype @@ -334,7 +334,7 @@ def forward( return image, target -class DetectionReferenceRandomIoUCropV1(nn.Module): +class RandomIoUCropV1(nn.Module): def __init__( self, min_scale: float = 0.3, From 05350be4215589059d41c656fccbec0068ebf023 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 4 Apr 2023 11:27:19 +0200 Subject: [PATCH 4/4] cleanup --- datasets.py | 6 +- main.py | 129 +++++++------- results/20230404093341.log | 339 +++++++++++++++++++++++++++++++++++++ transforms.py | 15 +- 4 files changed, 417 insertions(+), 72 deletions(-) create mode 100644 results/20230404093341.log diff --git a/datasets.py b/datasets.py index b4e3a7c..704a1e5 100644 --- a/datasets.py +++ b/datasets.py @@ -33,9 +33,9 @@ def detection_dataset_builder(*, api_version, rng, num_samples): dataset = _coco_remove_images_without_annotations(dataset) - idcs = torch.randperm(len(dataset), generator=rng)[:num_samples] - print(f"Caching {num_samples} COCO samples") - return [dataset[idx] for idx in tqdm(idcs.tolist())] + idcs = torch.randperm(len(dataset), generator=rng)[:num_samples].tolist() + print(f"Caching {num_samples} ({idcs[:3]} ... {idcs[-3:]}) COCO samples") + return [dataset[idx] for idx in tqdm(idcs)] # everything below is copy-pasted from diff --git a/main.py b/main.py index 48fb6d2..a883169 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import contextlib +import itertools import pathlib import string import sys @@ -33,87 +34,87 @@ def main(*, input_types, tasks, num_samples): # https://github.com/pytorch/pytorch/blob/19162083f8831be87be01bb84f186310cad1d348/torch/utils/data/_utils/worker.py#L222 torch.set_num_threads(1) + dataset_rng = torch.Generator() + dataset_rng.manual_seed(0) + dataset_rng_state = dataset_rng.get_state() + for task_name in tasks: print("#" * 60) print(task_name) print("#" * 60) medians = {input_type: {} for input_type in input_types} - for input_type in input_types: - dataset_rng = torch.Generator() - dataset_rng.manual_seed(0) - dataset_rng_state = dataset_rng.get_state() - - for api_version in ["v1", "v2"]: - dataset_rng.set_state(dataset_rng_state) - task = make_task( - task_name, - input_type=input_type, - api_version=api_version, - dataset_rng=dataset_rng, - num_samples=num_samples, - ) - if task is None: - continue - - print(f"{input_type=}, {api_version=}") - print() - print(f"Results computed for {num_samples:_} samples") - print() - - pipeline, dataset = task - - for sample in dataset: - pipeline(sample) - - results = pipeline.extract_times() - field_len = max(len(name) for name in results) - print(f"{' ' * field_len} {'median ':>9} {'std ':>9}") - medians[input_type][api_version] = 0.0 - for transform_name, times in results.items(): - median = float(times.median()) - print( - f"{transform_name:{field_len}} {median * 1e6:6.0f} µs +- {float(times.std()) * 1e6:6.0f} µs" - ) - medians[input_type][api_version] += median + for input_type, api_version in itertools.product(input_types, ["v1", "v2"]): + dataset_rng.set_state(dataset_rng_state) + task = make_task( + task_name, + input_type=input_type, + api_version=api_version, + dataset_rng=dataset_rng, + num_samples=num_samples, + ) + if task is None: + continue - print( - f"\n{'total':{field_len}} {medians[input_type][api_version] * 1e6:6.0f} µs" - ) - print("-" * 60) + print(f"{input_type=}, {api_version=}") + print() + print(f"Results computed for {num_samples:_} samples") + print() - print() - print("Summaries") - print() + pipeline, dataset = task - field_len = max(len(input_type) for input_type in medians) - print(f"{' ' * field_len} v2 / v1") - for input_type, api_versions in medians.items(): - if len(api_versions) < 2: - continue + torch.manual_seed(0) + for sample in dataset: + pipeline(sample) + + results = pipeline.extract_times() + field_len = max(len(name) for name in results) + print(f"{' ' * field_len} {'median ':>9} {'std ':>9}") + medians[input_type][api_version] = 0.0 + for transform_name, times in results.items(): + median = float(times.median()) + print( + f"{transform_name:{field_len}} {median * 1e6:6.0f} µs +- {float(times.std()) * 1e6:6.0f} µs" + ) + medians[input_type][api_version] += median print( - f"{input_type:{field_len}} {api_versions['v2'] / api_versions['v1']:>7.2f}" + f"\n{'total':{field_len}} {medians[input_type][api_version] * 1e6:6.0f} µs" ) + print("-" * 60) - print() + print() + print("Summaries") + print() - medians_flat = { - f"{input_type}, {api_version}": median - for input_type, api_versions in medians.items() - for api_version, median in api_versions.items() - } - field_len = max(len(label) for label in medians_flat) + field_len = max(len(input_type) for input_type in medians) + print(f"{' ' * field_len} v2 / v1") + for input_type, api_versions in medians.items(): + if len(api_versions) < 2: + continue print( - f"{' ' * (field_len + 5)} {' '.join(f' [{id}]' for _, id in zip(range(len(medians_flat)), string.ascii_lowercase))}" + f"{input_type:{field_len}} {api_versions['v2'] / api_versions['v1']:>7.2f}" ) - for (label, val), id in zip(medians_flat.items(), string.ascii_lowercase): - print( - f"{label:>{field_len}}, [{id}] {' '.join(f'{val / ref:4.2f}' for ref in medians_flat.values())}" - ) - print() - print("Slowdown as row / col") + + print() + + medians_flat = { + f"{input_type}, {api_version}": median + for input_type, api_versions in medians.items() + for api_version, median in api_versions.items() + } + field_len = max(len(label) for label in medians_flat) + + print( + f"{' ' * (field_len + 5)} {' '.join(f' [{id}]' for _, id in zip(range(len(medians_flat)), string.ascii_lowercase))}" + ) + for (label, val), id in zip(medians_flat.items(), string.ascii_lowercase): + print( + f"{label:>{field_len}}, [{id}] {' '.join(f'{val / ref:4.2f}' for ref in medians_flat.values())}" + ) + print() + print("Slowdown as row / col") if __name__ == "__main__": diff --git a/results/20230404093341.log b/results/20230404093341.log new file mode 100644 index 0000000..fbf71c2 --- /dev/null +++ b/results/20230404093341.log @@ -0,0 +1,339 @@ +############################################################ +classification-simple +############################################################ +input_type='Tensor', api_version='v1' + +Results computed for 1_000 samples + + median std +PILToTensor 110 µs +- 10 µs +RandomResizedCropWithoutResizeV1 52 µs +- 9 µs +Resize 636 µs +- 184 µs +RandomHorizontalFlip 25 µs +- 10 µs +ConvertImageDtype 47 µs +- 11 µs +Normalize 75 µs +- 14 µs + +total 945 µs +------------------------------------------------------------ +input_type='Tensor', api_version='v2' + +Results computed for 1_000 samples + + median std +PILToTensor 116 µs +- 8 µs +RandomResizedCropWithoutResizeV2 55 µs +- 6 µs +Resize 618 µs +- 159 µs +RandomHorizontalFlip 35 µs +- 10 µs +ConvertDtype 42 µs +- 3 µs +Normalize 61 µs +- 4 µs + +total 926 µs +------------------------------------------------------------ +input_type='PIL', api_version='v1' + +Results computed for 1_000 samples + + median std +RandomResizedCropWithoutResizeV1 76 µs +- 11 µs +Resize 548 µs +- 152 µs +RandomHorizontalFlip 51 µs +- 22 µs +PILToTensor 52 µs +- 5 µs +ConvertImageDtype 50 µs +- 6 µs +Normalize 438 µs +- 36 µs + +total 1214 µs +------------------------------------------------------------ +input_type='PIL', api_version='v2' + +Results computed for 1_000 samples + + median std +RandomResizedCropWithoutResizeV2 73 µs +- 10 µs +Resize 540 µs +- 150 µs +RandomHorizontalFlip 58 µs +- 23 µs +PILToTensor 57 µs +- 3 µs +ConvertDtype 43 µs +- 3 µs +Normalize 417 µs +- 33 µs + +total 1189 µs +------------------------------------------------------------ +input_type='Datapoint', api_version='v2' + +Results computed for 1_000 samples + + median std +ToImageTensor 122 µs +- 9 µs +RandomResizedCropWithoutResizeV2 60 µs +- 7 µs +Resize 619 µs +- 163 µs +RandomHorizontalFlip 37 µs +- 12 µs +ConvertDtype 45 µs +- 6 µs +Normalize 64 µs +- 5 µs + +total 948 µs +------------------------------------------------------------ +############################################################ +classification-complex +############################################################ +input_type='Tensor', api_version='v1' + +Results computed for 1_000 samples + + median std +PILToTensor 109 µs +- 12 µs +RandomResizedCropWithoutResizeV1 53 µs +- 7 µs +Resize 630 µs +- 166 µs +RandomHorizontalFlip 18 µs +- 8 µs +AutoAugment 765 µs +- 623 µs +RandomErasing 14 µs +- 36 µs +ConvertImageDtype 48 µs +- 5 µs +Normalize 74 µs +- 6 µs + +total 1711 µs +------------------------------------------------------------ +input_type='Tensor', api_version='v2' + +Results computed for 1_000 samples + + median std +PILToTensor 116 µs +- 10 µs +RandomResizedCropWithoutResizeV2 55 µs +- 7 µs +Resize 632 µs +- 166 µs +RandomHorizontalFlip 24 µs +- 10 µs +AutoAugment 611 µs +- 606 µs +RandomErasing 18 µs +- 36 µs +ConvertDtype 42 µs +- 3 µs +Normalize 62 µs +- 5 µs + +total 1560 µs +------------------------------------------------------------ +input_type='PIL', api_version='v1' + +Results computed for 1_000 samples + + median std +RandomResizedCropWithoutResizeV1 80 µs +- 15 µs +Resize 587 µs +- 156 µs +RandomHorizontalFlip 22 µs +- 23 µs +AutoAugment 339 µs +- 234 µs +PILToTensor 57 µs +- 6 µs +RandomErasing 15 µs +- 36 µs +ConvertImageDtype 54 µs +- 16 µs +Normalize 459 µs +- 39 µs + +total 1613 µs +------------------------------------------------------------ +input_type='PIL', api_version='v2' + +Results computed for 1_000 samples + + median std +RandomResizedCropWithoutResizeV2 78 µs +- 12 µs +Resize 577 µs +- 160 µs +RandomHorizontalFlip 27 µs +- 24 µs +AutoAugment 307 µs +- 242 µs +PILToTensor 64 µs +- 4 µs +RandomErasing 18 µs +- 36 µs +ConvertDtype 45 µs +- 6 µs +Normalize 427 µs +- 34 µs + +total 1543 µs +------------------------------------------------------------ +input_type='Datapoint', api_version='v2' + +Results computed for 1_000 samples + + median std +ToImageTensor 125 µs +- 9 µs +RandomResizedCropWithoutResizeV2 61 µs +- 6 µs +Resize 646 µs +- 167 µs +RandomHorizontalFlip 22 µs +- 13 µs +AutoAugment 630 µs +- 381 µs +RandomErasing 18 µs +- 39 µs +ConvertDtype 48 µs +- 3 µs +Normalize 68 µs +- 6 µs + +total 1617 µs +------------------------------------------------------------ +############################################################ +detection-ssdlite +############################################################ +loading annotations into memory... +Done (t=9.71s) +creating index... +index created! +Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples +input_type='Tensor', api_version='v1' + +Results computed for 1_000 samples + + median std +ConvertCocoPolysToMaskV1 2799 µs +- 4403 µs +PILToTensorV1 268 µs +- 77 µs +RandomIoUCropV1 467 µs +- 7166 µs +RandomHorizontalFlipV1 18 µs +- 218 µs +ConvertImageDtypeV1 267 µs +- 178 µs + +total 3820 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=8.87s) +creating index... +index created! +Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples +input_type='Tensor', api_version='v2' + +Results computed for 1_000 samples + + median std +WrapCocoSampleForTransformsV2 1487 µs +- 2446 µs +PILToTensor 748 µs +- 5328 µs +RandomIoUCrop 1835 µs +- 7046 µs +RandomHorizontalFlip 559 µs +- 2322 µs +ConvertDtype 760 µs +- 5410 µs +SanitizeBoundingBox 1004 µs +- 4817 µs + +total 6394 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=9.84s) +creating index... +index created! +Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples +input_type='PIL', api_version='v1' + +Results computed for 1_000 samples + + median std +ConvertCocoPolysToMaskV1 2816 µs +- 4427 µs +RandomIoUCropV1 556 µs +- 7177 µs +RandomHorizontalFlipV1 20 µs +- 212 µs +PILToTensorV1 180 µs +- 112 µs +ConvertImageDtypeV1 281 µs +- 168 µs + +total 3851 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=9.73s) +creating index... +index created! +Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples +input_type='PIL', api_version='v2' + +Results computed for 1_000 samples + + median std +WrapCocoSampleForTransformsV2 1536 µs +- 2480 µs +RandomIoUCrop 1809 µs +- 9065 µs +RandomHorizontalFlip 582 µs +- 4570 µs +PILToTensor 653 µs +- 4991 µs +ConvertDtype 777 µs +- 5354 µs +SanitizeBoundingBox 1012 µs +- 6233 µs + +total 6369 µs +------------------------------------------------------------ +loading annotations into memory... +Done (t=9.91s) +creating index... +index created! +Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples +input_type='Datapoint', api_version='v2' + +Results computed for 1_000 samples + + median std +WrapCocoSampleForTransformsV2 1537 µs +- 2505 µs +ToImageTensor 833 µs +- 2973 µs +RandomIoUCrop 1717 µs +- 8842 µs +RandomHorizontalFlip 547 µs +- 5286 µs +ConvertDtype 725 µs +- 6290 µs +SanitizeBoundingBox 1021 µs +- 5869 µs + +total 6380 µs +------------------------------------------------------------ + +Summaries + + v2 / v1 +Tensor 1.67 +PIL 1.65 + + [a] [b] [c] [d] [e] + Tensor, v1, [a] 1.00 0.60 0.99 0.60 0.60 + Tensor, v2, [b] 1.67 1.00 1.66 1.00 1.00 + PIL, v1, [c] 1.01 0.60 1.00 0.60 0.60 + PIL, v2, [d] 1.67 1.00 1.65 1.00 1.00 +Datapoint, v2, [e] 1.67 1.00 1.66 1.00 1.00 + +Slowdown as row / col +############################################################ +Collecting environment information... +PyTorch version: 2.1.0.dev20230403+cpu +Is debug build: False +CUDA used to build PyTorch: Could not collect +ROCM used to build PyTorch: N/A + +OS: Arch Linux (x86_64) +GCC version: (GCC) 12.2.1 20230201 +Clang version: 15.0.7 +CMake version: version 3.25.3 +Libc version: glibc-2.37 + +Python version: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] (64-bit runtime) +Python platform: Linux-6.2.6-arch1-1-x86_64-with-glibc2.17 +Is CUDA available: False +CUDA runtime version: 11.7.99 +CUDA_MODULE_LOADING set to: N/A +GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080 +Nvidia driver version: 525.89.02 +cuDNN version: Could not collect +HIP runtime version: N/A +MIOpen runtime version: N/A +Is XNNPACK available: True + +CPU: +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 48 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 24 +On-line CPU(s) list: 0-23 +Vendor ID: AuthenticAMD +Model name: AMD Ryzen 9 5900X 12-Core Processor +CPU family: 25 +Model: 33 +Thread(s) per core: 2 +Core(s) per socket: 12 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU(s) scaling MHz: 56% +CPU max MHz: 4950,1948 +CPU min MHz: 2200,0000 +BogoMIPS: 7388,29 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm +Virtualization: AMD-V +L1d cache: 384 KiB (12 instances) +L1i cache: 384 KiB (12 instances) +L2 cache: 6 MiB (12 instances) +L3 cache: 64 MiB (2 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-23 +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Not affected +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected + +Versions of relevant libraries: +[pip3] light-the-torch==0.7.2 +[pip3] mypy-extensions==1.0.0 +[pip3] numpy==1.24.1 +[pip3] torch==2.1.0.dev20230403+cpu +[pip3] torchvision==0.16.0.dev20230403+cpu +[conda] Could not collect diff --git a/transforms.py b/transforms.py index 0b1ae97..c337d13 100644 --- a/transforms.py +++ b/transforms.py @@ -199,14 +199,19 @@ def _transform(self, inpt, params): class WrapCocoSampleForTransformsV2: def __init__(self): - num_samples = 117_266 wrapper_factory = WRAPPER_FACTORIES[datasets.CocoDetection] - mock_dataset = SimpleNamespace(ids=list(range(num_samples))) + # The v2 wrapper depends on the `.ids` attribute of a `CocoDetection` dataset. + # However, this is eliminated above while filtering out images without + # annotations. Thus, we fake it here + mock_dataset = SimpleNamespace(ids=["invalid"]) wrapper = wrapper_factory(mock_dataset) - self.wrapper = functools.partial(wrapper, num_samples // 2) + # The wrapper gets passed the index alongside the sample to wrap. The former is + # only used to retrieve the image ID by accessing the `.ids` attribute. Thus, we + # need to use any value so `.ids[idx]` works. + self.wrapper = functools.partial(wrapper, 0) - def __call__(self, *inputs): - return self.wrapper(inputs if len(inputs) > 1 else inputs[0]) + def __call__(self, image, target): + return self.wrapper((image, target)) # everything below is copy-pasted from