Skip to content

Commit 758701c

Browse files
committed
Merge branch 'master' into dynas/transformer
Signed-off-by: Xinyu Ye <[email protected]> Conflicts: test/nas/test_nas.py
2 parents 858d7b0 + 30803cf commit 758701c

File tree

98 files changed

+12826
-1276
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+12826
-1276
lines changed

examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ After prepare step is done, we add tune and benchmark code to generate quantized
173173
#### Benchmark
174174
```python
175175
from neural_compressor.experimental import Benchmark, common
176-
from neural_compressor.model.model import get_model_type
176+
from neural_compressor.model.tensorflow_model import get_model_type
177177
evaluator = Benchmark(FLAGS.config)
178178
dataset = Dataset(eval_file, FLAGS.eval_batch_size)
179179
evaluator.b_dataloader = common.DataLoader(\

examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ def result(self):
11091109
evaluator.metric = Accuracy()
11101110

11111111

1112-
from neural_compressor.model.model import get_model_type
1112+
from neural_compressor.model.tensorflow_model import get_model_type
11131113
model_type = get_model_type(FLAGS.input_model)
11141114
if model_type == 'frozen_pb':
11151115
evaluator.model = FLAGS.input_model

neural_compressor/adaptor/mxnet_utils/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from enum import Enum
2525
from tempfile import TemporaryDirectory
2626
from neural_compressor.utils.utility import LazyImport
27-
from neural_compressor.model.model import MXNetModel as NCModel
27+
from neural_compressor.model.mxnet_model import MXNetModel as NCModel
2828

2929
mx = LazyImport("mxnet")
3030

neural_compressor/adaptor/tensorflow.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def train(self, model, dataloader, optimizer_tuple,
136136
criterion_tuple, hooks, postprocess, **kwargs):
137137
# check model is savedmodel or not
138138
import tensorflow as tf
139-
from neural_compressor.model.model import get_model_type
139+
from neural_compressor.model.tensorflow_model import get_model_type
140140
tf.random.set_seed(1)
141141
self.model_type = get_model_type(model._model)
142142
optimizer = optimizer_tuple[0](**optimizer_tuple[1])
@@ -1204,7 +1204,7 @@ def inspect_tensor(self, model, dataloader=None, op_list=[], iteration_list=[],
12041204
]
12051205
}
12061206
"""
1207-
from neural_compressor.model.model import TensorflowBaseModel
1207+
from neural_compressor.model.tensorflow_model import TensorflowBaseModel
12081208
from neural_compressor.utils.utility import load_data_from_pkl, dump_data_to_local
12091209
from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer
12101210
from .tf_utils.util import int8_node_name_reverse
@@ -1586,7 +1586,8 @@ def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader
15861586

15871587
def _partial_dataset_of(self, dataloader, confidence_batches):
15881588
from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset
1589-
if isinstance(dataloader.dataset, DummyDataset):
1589+
from neural_compressor.data.datasets.dummy_dataset import DummyDataset as DummyDataset_v2_x
1590+
if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDataset_v2_x):
15901591
assert(isinstance(confidence_batches, int))
15911592
ds = copy.deepcopy(dataloader.dataset)
15921593
ds.dataset = ds.dataset[:confidence_batches]

neural_compressor/data/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,29 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17+
# ==============================================================================
18+
"""Built-in dataloaders, datasets, transforms, filters for multiple framework backends."""
1719

1820

19-
from .dataloaders import DataLoader
2021
import neural_compressor.data.datasets
2122
import neural_compressor.data.transforms
22-
from ..experimental.data.datasets import DATASETS, Dataset, IterableDataset, dataset_registry
23-
from ..experimental.data.transforms import TRANSFORMS, BaseTransform, transform_registry
24-
from ..experimental.data.dataloaders import DATALOADERS
25-
from ..experimental.data.filters import FILTERS, Filter, filter_registry
23+
from .datasets import Datasets, Dataset, IterableDataset, dataset_registry
24+
from .dataloaders import DATALOADERS, DataLoader
25+
from .transforms import TRANSFORMS, BaseTransform, transform_registry, Postprocess
26+
27+
from .filters import FILTERS, Filter, filter_registry
2628

2729
__all__ = [
2830
"DataLoader",
2931
"DATALOADERS",
30-
"DATASETS",
32+
"Datasets",
3133
"Dataset",
3234
"IterableDataset",
3335
"dataset_registry",
3436
"TRANSFORMS",
3537
"BaseTransform",
3638
"transform_registry",
39+
"Postprocess",
3740
"FILTERS",
3841
"Filter",
3942
"filter_registry",]

neural_compressor/data/dataloaders/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17+
# ==============================================================================
1718

18-
from .dataloader import DataLoader
19+
from .dataloader import DataLoader, DATALOADERS
1920

2021
__all__ = [
2122
"DataLoader",
22-
]
23+
"DATALOADERS"
24+
]
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2021 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
# ==============================================================================
18+
"""BaseDataloder of all dataloaders."""
19+
20+
from abc import abstractmethod
21+
22+
23+
class BaseDataLoader: # pragma: no cover
24+
"""Base class for all DataLoaders.
25+
26+
_generate_dataloader is needed to create a dataloader object
27+
from the general params like batch_size and sampler. The dynamic batching is just to
28+
generate a new dataloader by setting batch_size and last_batch.
29+
30+
"""
31+
32+
def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
33+
sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
34+
shuffle=False, distributed=False):
35+
"""Initialize BaseDataLoader.
36+
37+
Args:
38+
dataset (object): dataset from which to load the data
39+
batch_size (int, optional): number of samples per batch. Defaults to 1.
40+
last_batch (str, optional): whether to drop the last batch if it is incomplete.
41+
Support ['rollover', 'discard'], rollover means False, discard means True.
42+
Defaults to 'rollover'.
43+
collate_fn (callable, optional): merge data with outer dimension batch size. Defaults to None.
44+
sampler (Sampler, optional): Sampler object to sample data. Defaults to None.
45+
batch_sampler (BatchSampler, optional): BatchSampler object to generate batch of indices. Defaults to None.
46+
num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0.
47+
pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False.
48+
shuffle (bool, optional): whether to shuffle data. Defaults to False.
49+
distributed (bool, optional): whether the dataloader is distributed. Defaults to False.
50+
"""
51+
self.dataset = dataset
52+
self.collate_fn = collate_fn
53+
self.sampler = sampler
54+
self.batch_sampler = batch_sampler
55+
self.num_workers = num_workers
56+
self.pin_memory = pin_memory
57+
self._batch_size = batch_size
58+
self.shuffle = shuffle
59+
self.distributed = distributed
60+
self.last_batch = last_batch
61+
self.drop_last = False if last_batch == 'rollover' else True
62+
63+
self.dataloader = self._generate_dataloader(
64+
self.dataset,
65+
batch_size=batch_size,
66+
last_batch=last_batch,
67+
collate_fn=collate_fn,
68+
sampler=sampler,
69+
batch_sampler=batch_sampler,
70+
num_workers=num_workers,
71+
pin_memory=pin_memory,
72+
shuffle=shuffle,
73+
distributed=distributed)
74+
75+
def batch(self, batch_size, last_batch=None):
76+
"""Set batch size for dataloader.
77+
78+
Args:
79+
batch_size (int): number of samples per batch.
80+
last_batch (str, optional): whether to drop the last batch if it is incomplete.
81+
Support ['rollover', 'discard'], rollover means False, discard means True.
82+
Defaults to None.
83+
"""
84+
self._batch_size = batch_size
85+
if last_batch is not None:
86+
self.last_batch = last_batch
87+
self.dataloader = self._generate_dataloader(
88+
self.dataset,
89+
batch_size,
90+
self.last_batch,
91+
self.collate_fn,
92+
self.sampler,
93+
self.batch_sampler,
94+
self.num_workers,
95+
self.pin_memory,
96+
self.shuffle,
97+
self.distributed)
98+
99+
@property
100+
def batch_size(self):
101+
"""Get dataloader's batch_size.
102+
103+
Returns:
104+
int: batch_size
105+
"""
106+
return self._batch_size
107+
108+
def __iter__(self):
109+
"""Yield data in iterative order.
110+
111+
Returns:
112+
iterator: iterator for dataloder
113+
"""
114+
return iter(self.dataloader)
115+
116+
@abstractmethod
117+
def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
118+
batch_sampler, num_workers, pin_memory, shuffle, distributed):
119+
raise NotImplementedError

neural_compressor/data/dataloaders/dataloader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717

18+
"""Built-in dataloaders for multiple framework backends."""
19+
1820
from neural_compressor.experimental.data.dataloaders import DATALOADERS
1921

2022
# THIS API IS TO BE DEPRECATED!
2123
class DataLoader(object):
2224
"""Entrance of all configured DataLoaders. Will dispatch the DataLoaders to framework
2325
specific one. Users will be not aware of the dispatching, and the Interface is unified.
24-
2526
"""
2627

2728
def __new__(cls, framework, dataset, batch_size=1, collate_fn=None,
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2021 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
# ==============================================================================
18+
"""Default dataloader for multiple framework backends."""
19+
20+
import collections
21+
import numpy as np
22+
from math import ceil, floor
23+
from abc import abstractmethod
24+
from .sampler import IterableSampler, SequentialSampler, BatchSampler
25+
from .fetcher import FETCHERS
26+
from .base_dataloader import BaseDataLoader
27+
28+
def default_collate(batch): # pragma: no cover
29+
"""Merge data with outer dimension batch size."""
30+
elem = batch[0]
31+
if isinstance(elem, collections.abc.Mapping):
32+
return {key: default_collate([d[key] for d in batch]) for key in elem}
33+
elif isinstance(elem, collections.abc.Sequence):
34+
batch = zip(*batch)
35+
return [default_collate(samples) for samples in batch]
36+
elif isinstance(elem, np.ndarray):
37+
try:
38+
return np.stack(batch)
39+
except:
40+
return batch
41+
else:
42+
return batch
43+
44+
class DefaultDataLoader(BaseDataLoader): # pragma: no cover
45+
"""DefaultDataLoader for multiple framework backends."""
46+
47+
def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
48+
sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
49+
shuffle=False, distributed=False):
50+
"""Initialize DefaultDataLoader.
51+
52+
Args:
53+
dataset (object): dataset from which to load the data
54+
batch_size (int, optional): number of samples per batch. Defaults to 1.
55+
last_batch (str, optional): whether to drop the last batch if it is incomplete.
56+
Support ['rollover', 'discard'], rollover means False, discard means True.
57+
Defaults to 'rollover'.
58+
collate_fn (callable, optional): merge data with outer dimension batch size. Defaults to None.
59+
sampler (Sampler, optional): Sampler object to sample data. Defaults to None.
60+
batch_sampler (BatchSampler, optional): BatchSampler object to generate batch of indices. Defaults to None.
61+
num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0.
62+
pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False.
63+
shuffle (bool, optional): whether to shuffle data. Defaults to False.
64+
distributed (bool, optional): whether the dataloader is distributed. Defaults to False.
65+
"""
66+
self.dataset = dataset
67+
self.last_batch = last_batch
68+
self.sampler = sampler
69+
self.batch_sampler = batch_sampler
70+
self.num_workers = num_workers
71+
self.pin_memory = pin_memory
72+
self.collate_fn = collate_fn
73+
self._batch_size = batch_size
74+
self.shuffle = shuffle
75+
self.distributed = distributed
76+
self.drop_last = False if last_batch == 'rollover' else True
77+
if self.collate_fn == None:
78+
self.collate_fn = default_collate
79+
80+
def batch(self, batch_size, last_batch='rollover'):
81+
"""Set batch_size and last_batch."""
82+
self._batch_size = batch_size
83+
self.last_batch = last_batch
84+
85+
@property
86+
def dataloader(self):
87+
"""Return dataloader."""
88+
return self
89+
90+
def __iter__(self):
91+
"""Yield data in iterative order."""
92+
return self._generate_dataloader(
93+
self.dataset,
94+
batch_size=self.batch_size,
95+
last_batch=self.last_batch,
96+
collate_fn=self.collate_fn,
97+
sampler=self.sampler,
98+
batch_sampler=self.batch_sampler,
99+
num_workers=self.num_workers,
100+
pin_memory=self.pin_memory,
101+
shuffle=self.shuffle,
102+
distributed=self.distributed)
103+
104+
def __len__(self):
105+
"""Get dataset length."""
106+
try:
107+
dataset_len = self.dataset.__len__()
108+
except (AttributeError, TypeError):
109+
dataset_len = 0
110+
for _ in self.dataset:
111+
dataset_len += 1
112+
except Exception:
113+
raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \
114+
" does not support calculating the length of its dataloader")
115+
if self.drop_last == False:
116+
dataloader_len = ceil(dataset_len / self.batch_size)
117+
else:
118+
dataloader_len = floor(dataset_len / self.batch_size)
119+
return dataloader_len
120+
121+
def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
122+
batch_sampler, num_workers, pin_memory, shuffle, distributed):
123+
124+
sampler = self._generate_sampler(dataset, distributed)
125+
self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
126+
self.fetcher = FETCHERS[self.dataset_type](dataset, collate_fn, self.drop_last, distributed)
127+
128+
for batched_indices in self.batch_sampler:
129+
try:
130+
data = self.fetcher(batched_indices)
131+
yield data
132+
except StopIteration:
133+
return
134+
135+
def _generate_sampler(self, dataset, distributed):
136+
if hasattr(dataset, "__getitem__"):
137+
self.dataset_type = 'index'
138+
return SequentialSampler(dataset, distributed)
139+
elif hasattr(dataset, "__iter__"):
140+
self.dataset_type = 'iter'
141+
return IterableSampler(dataset)
142+
else:
143+
raise ValueError("dataset type only support (index, iter)")

0 commit comments

Comments
 (0)