Skip to content

Commit 30567b4

Browse files
committed
Move files to _models
1 parent daae64d commit 30567b4

File tree

17 files changed

+34
-34
lines changed

17 files changed

+34
-34
lines changed

benchmarks/_models/llama/eval.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from tokenizer import get_tokenizer
1616

1717
import torchao
18-
from benchmarks._models.llama.model import prepare_inputs_for_model
18+
from torchao._models.model import prepare_inputs_for_model
1919
from torchao.quantization import (
2020
PerRow,
2121
PerTensor,
@@ -120,7 +120,7 @@ def run_evaluation(
120120
quantize_(model, int4_weight_only(layout=MarlinSparseLayout()))
121121
if "int4wo" in quantization and "gptq" in quantization:
122122
# avoid circular imports
123-
from benchmarks._models._eval import MultiTensorInputRecorder
123+
from torchao._models._eval import MultiTensorInputRecorder
124124
from torchao.quantization.GPTQ_MT import Int4WeightOnlyGPTQQuantizer
125125

126126
groupsize = int(quantization.split("-")[-2])
@@ -172,7 +172,7 @@ def run_evaluation(
172172
if "autoround" in quantization:
173173
from transformers import AutoTokenizer
174174

175-
from benchmarks._models.llama.model import TransformerBlock
175+
from torchao._models.model import TransformerBlock
176176
from torchao.prototype.autoround.autoround_llm import (
177177
quantize_model_with_autoround_,
178178
)
@@ -242,7 +242,7 @@ def run_evaluation(
242242
with torch.no_grad():
243243
print("Running evaluation ...")
244244
# avoid circular imports
245-
from benchmarks._models._eval import TransformerEvalWrapper
245+
from torchao._models._eval import TransformerEvalWrapper
246246

247247
TransformerEvalWrapper(
248248
model=model.to(device),

benchmarks/_models/llama/generate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ def device_sync(device):
7272
wd = Path(__file__).parent.parent.resolve()
7373
sys.path.append(str(wd))
7474

75-
from benchmarks._models.llama.model import Transformer, prepare_inputs_for_model
76-
from benchmarks._models.llama.tokenizer import get_tokenizer
75+
from torchao._models.model import Transformer, prepare_inputs_for_model
76+
from torchao._models.tokenizer import get_tokenizer
7777

7878

7979
def multinomial_sample_one_no_sync(
@@ -476,7 +476,7 @@ def ffn_or_attn_only(mod, fqn):
476476
filter_fn=lambda x, *args: isinstance(x, torch.nn.Embedding),
477477
)
478478
elif quantization.startswith("awq"):
479-
from benchmarks._models._eval import TransformerEvalWrapper
479+
from torchao._models._eval import TransformerEvalWrapper
480480
from torchao.utils import TORCH_VERSION_AT_LEAST_2_3
481481

482482
if not TORCH_VERSION_AT_LEAST_2_3:
@@ -575,8 +575,8 @@ def ffn_or_attn_only(mod, fqn):
575575
model, float8_dynamic_activation_float8_weight(granularity=granularity)
576576
)
577577
elif "autoquant_v2" in quantization:
578-
from benchmarks._models.llama.model import prepare_inputs_for_model
579-
from benchmarks._models._eval import InputRecorder
578+
from torchao._models.model import prepare_inputs_for_model
579+
from torchao._models._eval import InputRecorder
580580
from torchao.prototype.quantization.autoquant_v2 import autoquant_v2
581581

582582
calibration_seq_length = 256
@@ -665,8 +665,8 @@ def ffn_or_attn_only(mod, fqn):
665665
# do autoquantization
666666
model.finalize_autoquant()
667667
elif "autoquant" in quantization:
668-
from benchmarks._models.llama.model import prepare_inputs_for_model
669-
from benchmarks._models._eval import InputRecorder
668+
from torchao._models.model import prepare_inputs_for_model
669+
from torchao._models._eval import InputRecorder
670670

671671
calibration_seq_length = 256
672672
inputs = (

benchmarks/_models/llama/perf_profile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@
116116
import torch
117117
from torch.nn.attention import SDPBackend
118118

119-
from benchmarks._models.llama.model import Transformer
120-
from benchmarks._models.llama.tokenizer import get_tokenizer
119+
from torchao._models.model import Transformer
120+
from torchao._models.tokenizer import get_tokenizer
121121
from torchao.prototype.profiler import (
122122
CUDADeviceSpec,
123123
TransformerPerformanceCounter,

benchmarks/quantized_training/pretrain_llama2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from torch.utils.checkpoint import checkpoint
2323
from tqdm import tqdm
2424

25-
from benchmarks._models.llama.model import (
25+
from torchao._models.model import (
2626
ModelArgs,
2727
RMSNorm,
2828
Transformer,

scripts/convert_hf_checkpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import torch
1515
from safetensors.torch import load_file as load_safetensors_file
1616

17-
from benchmarks._models.llama.model import ModelArgs
17+
from torchao._models.model import ModelArgs
1818

1919

2020
@torch.inference_mode()

test/prototype/test_spinquant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
import torch
33

4-
from benchmarks._models.llama.model import Transformer
4+
from torchao._models.model import Transformer
55
from torchao.prototype.spinquant import apply_spinquant
66

77

test/quantization/test_gptq_mt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import torch.nn.functional as F
66
from torch.testing._internal.common_utils import run_tests
77

8-
from benchmarks._models.llama.model import Transformer, prepare_inputs_for_model
9-
from benchmarks._models.llama.tokenizer import get_tokenizer
8+
from torchao._models.model import Transformer, prepare_inputs_for_model
9+
from torchao._models.tokenizer import get_tokenizer
1010
from torchao.quantization.GPTQ_MT import Int4WeightOnlyGPTQQuantizer, MultiTensor
1111
from torchao.quantization.utils import _lm_eval_available
1212
from torchao.utils import is_fbcode

test/quantization/test_quant_api.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from torch.testing._internal.common_utils import TestCase
2323

2424
from torchao import quantize_
25-
from benchmarks._models.llama.model import Transformer, prepare_inputs_for_model
26-
from benchmarks._models.llama.tokenizer import get_tokenizer
25+
from torchao._models.model import Transformer, prepare_inputs_for_model
26+
from torchao._models.tokenizer import get_tokenizer
2727
from torchao.dtypes import AffineQuantizedTensor
2828
from torchao.quantization import LinearActivationQuantizedTensor
2929
from torchao.quantization.quant_api import (
@@ -278,7 +278,7 @@ def test_8da4w_quantizer(self):
278278
# https://github.com/pytorch-labs/gpt-fast/blob/6253c6bb054e658d67566150f87329b87815ae63/scripts/convert_hf_checkpoint.py
279279
@unittest.skip("skipping until we get checkpoints for gpt-fast")
280280
def test_8da4w_gptq_quantizer(self):
281-
from benchmarks._models._eval import InputRecorder, TransformerEvalWrapper
281+
from torchao._models._eval import InputRecorder, TransformerEvalWrapper
282282
from torchao.quantization.GPTQ import Int8DynActInt4WeightGPTQQuantizer
283283

284284
# should be similar to TorchCompileDynamicQuantizer
@@ -348,7 +348,7 @@ def test_8da4w_gptq_quantizer(self):
348348
not TORCH_VERSION_AT_LEAST_2_4, "skipping when torch verion is 2.4 or lower"
349349
)
350350
def test_8da4w_quantizer_eval(self):
351-
from benchmarks._models._eval import TransformerEvalWrapper
351+
from torchao._models._eval import TransformerEvalWrapper
352352
from torchao.quantization.quant_api import Int8DynActInt4WeightQuantizer
353353

354354
precision = torch.bfloat16
@@ -384,7 +384,7 @@ def test_8da4w_quantizer_eval(self):
384384

385385
@unittest.skip("skipping until we get checkpoints for gpt-fast")
386386
def test_gptq_quantizer_int4_weight_only(self):
387-
from benchmarks._models._eval import (
387+
from torchao._models._eval import (
388388
MultiTensorInputRecorder,
389389
TransformerEvalWrapper,
390390
)
@@ -454,7 +454,7 @@ def test_gptq_quantizer_int4_weight_only(self):
454454

455455
@unittest.skip("skipping until we get checkpoints for gpt-fast")
456456
def test_quantizer_int4_weight_only(self):
457-
from benchmarks._models._eval import TransformerEvalWrapper
457+
from torchao._models._eval import TransformerEvalWrapper
458458
from torchao.quantization.GPTQ import Int4WeightOnlyQuantizer
459459

460460
precision = torch.bfloat16
@@ -492,7 +492,7 @@ def test_quantizer_int4_weight_only(self):
492492

493493
@unittest.skip("skipping until we get checkpoints for gpt-fast")
494494
def test_eval_wrapper(self):
495-
from benchmarks._models._eval import TransformerEvalWrapper
495+
from torchao._models._eval import TransformerEvalWrapper
496496

497497
precision = torch.bfloat16
498498
device = "cuda"
@@ -525,7 +525,7 @@ def test_eval_wrapper(self):
525525
# EVAL IS CURRENTLY BROKEN FOR LLAMA 3, VERY LOW ACCURACY
526526
@unittest.skip("skipping until we get checkpoints for gpt-fast")
527527
def test_eval_wrapper_llama3(self):
528-
from benchmarks._models._eval import TransformerEvalWrapper
528+
from torchao._models._eval import TransformerEvalWrapper
529529

530530
precision = torch.bfloat16
531531
device = "cuda"

test/test_ao_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
import torch
33

4-
from benchmarks._models.llama.model import Transformer
4+
from torchao._models.model import Transformer
55

66
_AVAILABLE_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
77

torchao/_models/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)