diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md
index 03b91b484ef..a2033427b81 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md
@@ -9,7 +9,6 @@ This example load LayoutLMv3 model and confirm its accuracy and speed based on [
 ```shell
 pip install neural-compressor
 pip install -r requirements.txt
-bash install_layoutlmft.sh
 ```
 > Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/data_utils.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/data_utils.py
new file mode 100644
index 00000000000..ef83f822b1f
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/data_utils.py
@@ -0,0 +1,179 @@
+from dataclasses import dataclass, field
+from typing import Optional, Union
+
+import torch
+
+from detectron2.structures import ImageList
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import ResizeTransform, TransformList
+
+from transformers import PreTrainedTokenizerBase
+from transformers.file_utils import PaddingStrategy
+
+
+@dataclass
+class DataCollatorForKeyValueExtraction:
+    """
+    Data collator that will dynamically pad the inputs received, as well as the labels.
+
+    Args:
+        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
+            The tokenizer used for encoding the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
+            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
+    """
+
+    tokenizer: PreTrainedTokenizerBase
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    label_pad_token_id: int = -100
+
+    def __call__(self, features):
+        label_name = "label" if "label" in features[0].keys() else "labels"
+        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
+
+        has_image_input = "image" in features[0]
+        has_bbox_input = "bbox" in features[0]
+        if has_image_input:
+            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
+            for feature in features:
+                del feature["image"]
+        batch = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
+            return_tensors="pt" if labels is None else None,
+        )
+
+        if labels is None:
+            return batch
+
+        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
+        padding_side = self.tokenizer.padding_side
+        if padding_side == "right":
+            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
+        else:
+            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
+
+        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
+        if has_image_input:
+            batch["image"] = image
+        return batch
+    
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+
+
+@dataclass
+class XFUNDataTrainingArguments(DataTrainingArguments):
+    lang: Optional[str] = field(default="en")
+    additional_langs: Optional[str] = field(default=None)
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/funsd.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/funsd.py
new file mode 100644
index 00000000000..f91f4fd3dd8
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/funsd.py
@@ -0,0 +1,116 @@
+# coding=utf-8
+
+import json
+import os
+
+import datasets
+
+from data_utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/install_layoutlmft.sh b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/install_layoutlmft.sh
deleted file mode 100644
index 7fb2814537b..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/install_layoutlmft.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-git clone https://github.com/microsoft/unilm.git
-cd unilm/layoutlmft
-pip install -e .
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/main.py
index 85f32a67518..80074d4dd7e 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/main.py
@@ -10,11 +10,10 @@
 import numpy as np
 from datasets import ClassLabel, load_dataset, load_metric
 
-import layoutlmft.data.datasets.funsd
+import funsd
 import transformers
-from layoutlmft.data import DataCollatorForKeyValueExtraction
-from layoutlmft.data.data_args import DataTrainingArguments
-from layoutlmft.trainers import FunsdTrainer as Trainer
+from data_utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
+from trainer import FunsdTrainer as Trainer
 from transformers import (
     AutoConfig,
     AutoModelForTokenClassification,
@@ -184,7 +183,7 @@ def main():
     # Set seed before initializing model.
     set_seed(training_args.seed)
 
-    datasets = load_dataset(os.path.abspath(layoutlmft.data.datasets.funsd.__file__))
+    datasets = load_dataset(os.path.abspath(funsd.__file__))
     if training_args.do_train:
         column_names = datasets["train"].column_names
         features = datasets["train"].features
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/requirements.txt
index 505a5d4aa80..864fa3eaada 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/requirements.txt
@@ -1,10 +1,11 @@
-datasets==1.6.2
-transformers==4.6
-huggingface-hub==0.0.8
-seqeval==1.2.2
-tensorboard==2.7.0
+accelerate
+datasets
+transformers
+huggingface-hub
+seqeval
+tensorboard
 sentencepiece
-timm==0.4.12
+timm
 Pillow
 einops
 textdistance
@@ -17,4 +18,4 @@ onnx
 onnxruntime
 onnxruntime-extensions; python_version < '3.10'
 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
-detectron2
+detectron2
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/trainer.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/trainer.py
new file mode 100644
index 00000000000..0a2f88390f8
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic/trainer.py
@@ -0,0 +1,21 @@
+from typing import Any, Dict, Union
+
+import torch
+
+from transformers import Trainer
+
+
+class FunsdTrainer(Trainer):
+    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
+        """
+        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
+        handling potential state.
+        """
+        for k, v in inputs.items():
+            if hasattr(v, "to") and hasattr(v, "device"):
+                inputs[k] = v.to(self.args.device)
+
+        if self.args.past_index >= 0 and self._past is not None:
+            inputs["mems"] = self._past
+
+        return inputs
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/README.md
index 58232f6485e..fc05e9cd229 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/README.md
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/README.md
@@ -9,7 +9,6 @@ This example load LayoutLMv3 model and confirm its accuracy and speed based on [
 ```shell
 pip install neural-compressor
 pip install -r requirements.txt
-bash install_layoutlmft.sh
 ```
 > Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/data_utils.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/data_utils.py
new file mode 100644
index 00000000000..ef83f822b1f
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/data_utils.py
@@ -0,0 +1,179 @@
+from dataclasses import dataclass, field
+from typing import Optional, Union
+
+import torch
+
+from detectron2.structures import ImageList
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import ResizeTransform, TransformList
+
+from transformers import PreTrainedTokenizerBase
+from transformers.file_utils import PaddingStrategy
+
+
+@dataclass
+class DataCollatorForKeyValueExtraction:
+    """
+    Data collator that will dynamically pad the inputs received, as well as the labels.
+
+    Args:
+        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
+            The tokenizer used for encoding the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
+            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
+    """
+
+    tokenizer: PreTrainedTokenizerBase
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    label_pad_token_id: int = -100
+
+    def __call__(self, features):
+        label_name = "label" if "label" in features[0].keys() else "labels"
+        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
+
+        has_image_input = "image" in features[0]
+        has_bbox_input = "bbox" in features[0]
+        if has_image_input:
+            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
+            for feature in features:
+                del feature["image"]
+        batch = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
+            return_tensors="pt" if labels is None else None,
+        )
+
+        if labels is None:
+            return batch
+
+        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
+        padding_side = self.tokenizer.padding_side
+        if padding_side == "right":
+            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
+        else:
+            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
+
+        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
+        if has_image_input:
+            batch["image"] = image
+        return batch
+    
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+
+
+@dataclass
+class XFUNDataTrainingArguments(DataTrainingArguments):
+    lang: Optional[str] = field(default="en")
+    additional_langs: Optional[str] = field(default=None)
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/funsd.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/funsd.py
new file mode 100644
index 00000000000..f91f4fd3dd8
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/funsd.py
@@ -0,0 +1,116 @@
+# coding=utf-8
+
+import json
+import os
+
+import datasets
+
+from data_utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/install_layoutlmft.sh b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/install_layoutlmft.sh
deleted file mode 100644
index 7fb2814537b..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/install_layoutlmft.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-git clone https://github.com/microsoft/unilm.git
-cd unilm/layoutlmft
-pip install -e .
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/main.py
index 26d2517d623..4bd4b63d870 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/main.py
@@ -10,11 +10,10 @@
 import numpy as np
 from datasets import ClassLabel, load_dataset, load_metric
 
-import layoutlmft.data.datasets.funsd
+import funsd
 import transformers
-from layoutlmft.data import DataCollatorForKeyValueExtraction
-from layoutlmft.data.data_args import DataTrainingArguments
-from layoutlmft.trainers import FunsdTrainer as Trainer
+from data_utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
+from trainer import FunsdTrainer as Trainer
 from transformers import (
     AutoConfig,
     AutoModelForTokenClassification,
@@ -188,7 +187,7 @@ def main():
     # Set seed before initializing model.
     set_seed(training_args.seed)
 
-    datasets = load_dataset(os.path.abspath(layoutlmft.data.datasets.funsd.__file__))
+    datasets = load_dataset(os.path.abspath(funsd.__file__))
     if training_args.do_train:
         column_names = datasets["train"].column_names
         features = datasets["train"].features
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/requirements.txt
index 505a5d4aa80..864fa3eaada 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/requirements.txt
@@ -1,10 +1,11 @@
-datasets==1.6.2
-transformers==4.6
-huggingface-hub==0.0.8
-seqeval==1.2.2
-tensorboard==2.7.0
+accelerate
+datasets
+transformers
+huggingface-hub
+seqeval
+tensorboard
 sentencepiece
-timm==0.4.12
+timm
 Pillow
 einops
 textdistance
@@ -17,4 +18,4 @@ onnx
 onnxruntime
 onnxruntime-extensions; python_version < '3.10'
 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
-detectron2
+detectron2
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/trainer.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/trainer.py
new file mode 100644
index 00000000000..0a2f88390f8
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static/trainer.py
@@ -0,0 +1,21 @@
+from typing import Any, Dict, Union
+
+import torch
+
+from transformers import Trainer
+
+
+class FunsdTrainer(Trainer):
+    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
+        """
+        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
+        handling potential state.
+        """
+        for k, v in inputs.items():
+            if hasattr(v, "to") and hasattr(v, "device"):
+                inputs[k] = v.to(self.args.device)
+
+        if self.args.past_index >= 0 and self._past is not None:
+            inputs["mems"] = self._past
+
+        return inputs
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
index b719d09e0b6..ee3c232b1af 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
@@ -9,7 +9,6 @@ This example load LayoutLMv3 model and confirm its accuracy and speed based on [
 ```shell
 pip install neural-compressor
 pip install -r requirements.txt
-bash install_layoutlmft.sh
 ```
 > Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
new file mode 100644
index 00000000000..dbadb70ba4c
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
@@ -0,0 +1,136 @@
+# coding=utf-8
+'''
+Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+'''
+import json
+import os
+
+import datasets
+
+from image_utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def get_line_bbox(self, bboxs):
+        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
+        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
+
+        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
+
+        assert x1 >= x0 and y1 >= y0
+        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
+        return bbox
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                cur_line_bboxes = []
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                # by default: --segment_level_layout 1
+                # if do not want to use segment_level_layout, comment the following line
+                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
+                # box = normalize_bbox(item["box"], size)
+                # cur_line_bboxes = [box for _ in range(len(words))]
+                bboxes.extend(cur_line_bboxes)
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
+                         "image": image, "image_path": image_path}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/image_utils.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/image_utils.py
new file mode 100644
index 00000000000..beeeb5b6ba9
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/image_utils.py
@@ -0,0 +1,284 @@
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import ResizeTransform, TransformList
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+            int(1000 * return_bb[0] / width),
+            int(1000 * return_bb[1] / height),
+            int(1000 * return_bb[2] / width),
+            int(1000 * return_bb[3] / height),
+        ]
+    return return_bb
+
+
+class ToNumpy:
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
+    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
+    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
+    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
+    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
+    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
+}
+
+
+def _pil_interp(method):
+    if method == 'bicubic':
+        return F.InterpolationMode.BICUBIC
+    elif method == 'lanczos':
+        return F.InterpolationMode.LANCZOS
+    elif method == 'hamming':
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
+                 interpolation='bilinear', second_interpolation='lanczos'):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = F.resize(img, self.second_size, self.second_interpolation) \
+            if self.second_size is not None else None
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
+        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ', interpolation={0}'.format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ', second_size={0}'.format(self.second_size)
+            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ')'
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/install_layoutlmft.sh b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/install_layoutlmft.sh
deleted file mode 100644
index fd29421017d..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/install_layoutlmft.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-git clone https://github.com/microsoft/unilm.git
-cd unilm/layoutlmv3
-pip install -e .
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
index fee932cf5ca..d300f3e2fdc 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
@@ -10,8 +10,6 @@
 from datasets import ClassLabel, load_dataset, load_metric
 
 import transformers
-
-from layoutlmft.data import DataCollatorForKeyValueExtraction
 from transformers import (
     AutoConfig,
     AutoModelForTokenClassification,
@@ -28,7 +26,7 @@
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
 check_min_version("4.5.0")
 
-from layoutlmft.data.image_utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
+from image_utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
 
 from timm.data.constants import \
     IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
@@ -263,11 +261,8 @@ def main():
 
     if data_args.dataset_name == 'funsd':
         # datasets = load_dataset("nielsr/funsd")
-        import layoutlmft.data.funsd
-        datasets = load_dataset(os.path.abspath(layoutlmft.data.funsd.__file__), cache_dir=model_args.cache_dir)
-    elif data_args.dataset_name == 'cord':
-        import layoutlmft.data.cord
-        datasets = load_dataset(os.path.abspath(layoutlmft.data.cord.__file__), cache_dir=model_args.cache_dir)
+        import funsd
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
     else:
         raise NotImplementedError()
 
@@ -275,6 +270,7 @@ def main():
     features = datasets["test"].features
 
     text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
 
     label_column_name = (
         f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
@@ -362,8 +358,7 @@ def tokenize_and_align_labels(examples, augmentation=False):
             padding=False,
             truncation=True,
             return_overflowing_tokens=True,
-            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
-            is_split_into_words=True,
+            boxes=examples[boxes_column_name],
         )
 
         labels = []
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
index 92ae07d8600..0f7947499f6 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
@@ -1,10 +1,10 @@
+accelerate
 datasets
-transformers==4.12.5
-seqeval==1.2.2
-tensorboard==2.7.0
-seqeval==1.2.2
+transformers
+seqeval
+tensorboard
 sentencepiece
-timm==0.4.12
+timm
 Pillow
 einops
 textdistance
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
index fbc132a921a..3e05e57d35a 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
@@ -9,7 +9,6 @@ This example load LayoutLMv3 model and confirm its accuracy and speed based on [
 ```shell
 pip install neural-compressor
 pip install -r requirements.txt
-bash install_layoutlmft.sh
 ```
 > Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
new file mode 100644
index 00000000000..dbadb70ba4c
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
@@ -0,0 +1,136 @@
+# coding=utf-8
+'''
+Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+'''
+import json
+import os
+
+import datasets
+
+from image_utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def get_line_bbox(self, bboxs):
+        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
+        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
+
+        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
+
+        assert x1 >= x0 and y1 >= y0
+        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
+        return bbox
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                cur_line_bboxes = []
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                # by default: --segment_level_layout 1
+                # if do not want to use segment_level_layout, comment the following line
+                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
+                # box = normalize_bbox(item["box"], size)
+                # cur_line_bboxes = [box for _ in range(len(words))]
+                bboxes.extend(cur_line_bboxes)
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
+                         "image": image, "image_path": image_path}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/image_utils.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/image_utils.py
new file mode 100644
index 00000000000..beeeb5b6ba9
--- /dev/null
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/image_utils.py
@@ -0,0 +1,284 @@
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import ResizeTransform, TransformList
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+            int(1000 * return_bb[0] / width),
+            int(1000 * return_bb[1] / height),
+            int(1000 * return_bb[2] / width),
+            int(1000 * return_bb[3] / height),
+        ]
+    return return_bb
+
+
+class ToNumpy:
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
+    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
+    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
+    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
+    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
+    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
+}
+
+
+def _pil_interp(method):
+    if method == 'bicubic':
+        return F.InterpolationMode.BICUBIC
+    elif method == 'lanczos':
+        return F.InterpolationMode.LANCZOS
+    elif method == 'hamming':
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
+                 interpolation='bilinear', second_interpolation='lanczos'):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = F.resize(img, self.second_size, self.second_interpolation) \
+            if self.second_size is not None else None
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
+        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ', interpolation={0}'.format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ', second_size={0}'.format(self.second_size)
+            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ')'
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/install_layoutlmft.sh b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/install_layoutlmft.sh
deleted file mode 100644
index fd29421017d..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/install_layoutlmft.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-git clone https://github.com/microsoft/unilm.git
-cd unilm/layoutlmv3
-pip install -e .
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
index a9496b16fbb..b9462cfe067 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
@@ -10,8 +10,6 @@
 from datasets import ClassLabel, load_dataset, load_metric
 
 import transformers
-
-from layoutlmft.data import DataCollatorForKeyValueExtraction
 from transformers import (
     AutoConfig,
     AutoModelForTokenClassification,
@@ -28,7 +26,7 @@
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
 check_min_version("4.5.0")
 
-from layoutlmft.data.image_utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
+from image_utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
 
 from timm.data.constants import \
     IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
@@ -267,11 +265,8 @@ def main():
 
     if data_args.dataset_name == 'funsd':
         # datasets = load_dataset("nielsr/funsd")
-        import layoutlmft.data.funsd
-        datasets = load_dataset(os.path.abspath(layoutlmft.data.funsd.__file__), cache_dir=model_args.cache_dir)
-    elif data_args.dataset_name == 'cord':
-        import layoutlmft.data.cord
-        datasets = load_dataset(os.path.abspath(layoutlmft.data.cord.__file__), cache_dir=model_args.cache_dir)
+        import funsd
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
     else:
         raise NotImplementedError()
 
@@ -279,6 +274,7 @@ def main():
     features = datasets["test"].features
 
     text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
 
     label_column_name = (
         f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
@@ -366,8 +362,7 @@ def tokenize_and_align_labels(examples, augmentation=False):
             padding=False,
             truncation=True,
             return_overflowing_tokens=True,
-            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
-            is_split_into_words=True,
+            boxes=examples[boxes_column_name],
         )
 
         labels = []
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
index 92ae07d8600..0f7947499f6 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
@@ -1,10 +1,10 @@
+accelerate
 datasets
-transformers==4.12.5
-seqeval==1.2.2
-tensorboard==2.7.0
-seqeval==1.2.2
+transformers
+seqeval
+tensorboard
 sentencepiece
-timm==0.4.12
+timm
 Pillow
 einops
 textdistance