quic
diff --git a/‎QEfficient/cloud/finetune.py
Lines changed: 29 additions & 19 deletions b/‎QEfficient/cloud/finetune.py
Lines changed: 29 additions & 19 deletions
diff --git a/‎QEfficient/finetune/configs/training.py
Lines changed: 4 additions & 1 deletion b/‎QEfficient/finetune/configs/training.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎QEfficient/finetune/dataset/alpaca_dataset.py
Lines changed: 9 additions & 1 deletion b/‎QEfficient/finetune/dataset/alpaca_dataset.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎QEfficient/finetune/dataset/custom_dataset.py
Lines changed: 18 additions & 10 deletions b/‎QEfficient/finetune/dataset/custom_dataset.py
Lines changed: 18 additions & 10 deletions
diff --git a/‎QEfficient/finetune/dataset/grammar_dataset.py
Lines changed: 8 additions & 9 deletions b/‎QEfficient/finetune/dataset/grammar_dataset.py
Lines changed: 8 additions & 9 deletions
diff --git a/‎QEfficient/finetune/eval.py
Lines changed: 8 additions & 12 deletions b/‎QEfficient/finetune/eval.py
Lines changed: 8 additions & 12 deletions
@@ -5,6 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
+import logging
 import random
 import warnings
 from typing import Any, Dict, Optional, Union
@@ -17,7 +18,7 @@
 import torch.utils.data
 from peft import PeftModel, get_peft_model
 from torch.optim.lr_scheduler import StepLR
-from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 
 from QEfficient.finetune.configs.training import TrainConfig
 from QEfficient.finetune.utils.config_utils import (
@@ -26,18 +27,22 @@
     update_config,
 )
 from QEfficient.finetune.utils.dataset_utils import get_dataloader
+from QEfficient.finetune.utils.logging_utils import logger
 from QEfficient.finetune.utils.parser import get_finetune_parser
-from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
-from QEfficient.utils._utils import login_and_download_hf_lm
+from QEfficient.finetune.utils.train_utils import (
+    get_longest_seq_length,
+    print_model_size,
+    print_trainable_parameters,
+    train,
+)
+from QEfficient.utils._utils import hf_download
 
 # Try importing QAIC-specific module, proceed without it if unavailable
 try:
     import torch_qaic  # noqa: F401
 except ImportError as e:
-    print(f"Warning: {e}. Proceeding without QAIC modules.")
-
+    logger.log_rank_zero(f"{e}. Moving ahead without these qaic modules.", logging.WARNING)
 
-from transformers import AutoModelForSequenceClassification
 
 # Suppress all warnings
 warnings.filterwarnings("ignore")
@@ -106,7 +111,8 @@ def load_model_and_tokenizer(
         - Resizes model embeddings if tokenizer vocab size exceeds model embedding size.
         - Sets pad_token_id to eos_token_id if not defined in the tokenizer.
     """
-    pretrained_model_path = login_and_download_hf_lm(train_config.model_name)
+    logger.log_rank_zero(f"Loading HuggingFace model for {train_config.model_name}")
+    pretrained_model_path = hf_download(train_config.model_name)
     if train_config.task_type == "seq_classification":
         model = AutoModelForSequenceClassification.from_pretrained(
             pretrained_model_path,
@@ -116,7 +122,7 @@ def load_model_and_tokenizer(
         )
 
         if not hasattr(model, "base_model_prefix"):
-            raise RuntimeError("Given huggingface model does not have 'base_model_prefix' attribute.")
+            logger.raise_error("Given huggingface model does not have 'base_model_prefix' attribute.", RuntimeError)
 
         for param in getattr(model, model.base_model_prefix).parameters():
             param.requires_grad = False
@@ -141,11 +147,10 @@ def load_model_and_tokenizer(
     # If there is a mismatch between tokenizer vocab size and embedding matrix,
     # throw a warning and then expand the embedding matrix
     if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
-        print("WARNING: Resizing embedding matrix to match tokenizer vocab size.")
+        logger.log_rank_zero("Resizing the embedding matrix to match the tokenizer vocab size.", logging.WARNING)
         model.resize_token_embeddings(len(tokenizer))
 
-    # FIXME (Meet): Cover below line inside the logger once it is implemented.
-    print_model_size(model, train_config)
+    print_model_size(model)
 
     # Note: Need to call this before calling PeftModel.from_pretrained or get_peft_model.
     # Because, both makes model.is_gradient_checkpointing = True which is used in peft library to
@@ -157,7 +162,9 @@ def load_model_and_tokenizer(
         if hasattr(model, "supports_gradient_checkpointing") and model.supports_gradient_checkpointing:
             model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"preserve_rng_state": False})
         else:
-            raise RuntimeError("Given model doesn't support gradient checkpointing. Please disable it and run it.")
+            logger.raise_error(
+                "Given model doesn't support gradient checkpointing. Please disable it and run it.", RuntimeError
+            )
 
     model = apply_peft(model, train_config, peft_config_file, **kwargs)
 
@@ -192,7 +199,7 @@ def apply_peft(
     else:
         peft_config = generate_peft_config(train_config, peft_config_file, **kwargs)
         model = get_peft_model(model, peft_config)
-    model.print_trainable_parameters()
+    print_trainable_parameters(model)
 
     return model
 
@@ -217,25 +224,26 @@ def setup_dataloaders(
             - Length of longest sequence in the dataset.
 
     Raises:
-        ValueError: If validation is enabled but the validation set is too small.
+        RuntimeError: If validation is enabled but the validation set is too small.
 
     Notes:
         - Applies a custom data collator if provided by get_custom_data_collator.
         - Configures DataLoader kwargs using get_dataloader_kwargs for train and val splits.
     """
 
     train_dataloader = get_dataloader(tokenizer, dataset_config, train_config, split="train")
-    print(f"--> Num of Training Set Batches loaded = {len(train_dataloader)}")
+    logger.log_rank_zero(f"Number of Training Set Batches loaded = {len(train_dataloader)}")
 
     eval_dataloader = None
     if train_config.run_validation:
         eval_dataloader = get_dataloader(tokenizer, dataset_config, train_config, split="val")
         if len(eval_dataloader) == 0:
-            raise ValueError(
-                f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})"
+            logger.raise_error(
+                f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})",
+                ValueError,
             )
         else:
-            print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
+            logger.log_rank_zero(f"Number of Validation Set Batches loaded = {len(eval_dataloader)}")
 
         longest_seq_length, _ = get_longest_seq_length(
             torch.utils.data.ConcatDataset([train_dataloader.dataset, eval_dataloader.dataset])
@@ -274,13 +282,15 @@ def main(peft_config_file: str = None, **kwargs) -> None:
     dataset_config = generate_dataset_config(train_config.dataset)
     update_config(dataset_config, **kwargs)
 
+    logger.prepare_for_logs(train_config.output_dir, train_config.dump_logs, train_config.log_level)
+
     setup_distributed_training(train_config)
     setup_seeds(train_config.seed)
     model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, peft_config_file, **kwargs)
 
     # Create DataLoaders for the training and validation dataset
     train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
-    print(
+    logger.log_rank_zero(
         f"The longest sequence length in the train data is {longest_seq_length}, "
         f"passed context length is {train_config.context_length} and overall model's context length is "
         f"{model.config.max_position_embeddings}"
 
@@ -5,6 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
+import logging
 from dataclasses import dataclass
 
 
@@ -94,5 +95,7 @@ class TrainConfig:
     use_profiler: bool = False  # Enable pytorch profiler, can not be used with flop counter at the same time.
     # profiler_dir: str = "PATH/to/save/profiler/results" # will be used if using profiler
 
-    dump_root_dir: str = "mismatches/step_"
     opByOpVerifier: bool = False
+
+    dump_logs: bool = True
+    log_level: str = logging.INFO
@@ -11,6 +11,8 @@
 import torch
 from torch.utils.data import Dataset
 
+from QEfficient.finetune.utils.logging_utils import logger
+
 PROMPT_DICT = {
     "prompt_input": (
         "Below is an instruction that describes a task, paired with an input that provides further context. "
@@ -27,7 +29,13 @@
 
 class InstructionDataset(Dataset):
     def __init__(self, dataset_config, tokenizer, partition="train", context_length=None):
-        self.ann = json.load(open(dataset_config.data_path))
+        try:
+            self.ann = json.load(open(dataset_config.data_path))
+        except FileNotFoundError:
+            logger.raise_error(
+                "Loading of alpaca dataset failed! Please use (wget -c https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/refs/heads/main/alpaca_data.json -P dataset/) to download the alpaca dataset.",
+                FileNotFoundError,
+            )
         # Use 5% of the dataset for evaluation
         eval_length = int(len(self.ann) / 20)
         if partition == "train":
 
@@ -8,6 +8,8 @@
 import importlib
 from pathlib import Path
 
+from QEfficient.finetune.utils.logging_utils import logger
+
 
 def load_module_from_py_file(py_file: str) -> object:
     """
@@ -30,20 +32,22 @@ def get_custom_dataset(dataset_config, tokenizer, split: str, context_length=Non
         module_path, func_name = dataset_config.file, "get_custom_dataset"
 
     if not module_path.endswith(".py"):
-        raise ValueError(f"Dataset file {module_path} is not a .py file.")
+        logger.raise_error(f"Dataset file {module_path} is not a .py file.", ValueError)
 
     module_path = Path(module_path)
     if not module_path.is_file():
-        raise FileNotFoundError(f"Dataset py file {module_path.as_posix()} does not exist or is not a file.")
+        logger.raise_error(
+            f"Dataset py file {module_path.as_posix()} does not exist or is not a file.", FileNotFoundError
+        )
 
     module = load_module_from_py_file(module_path.as_posix())
     try:
         return getattr(module, func_name)(dataset_config, tokenizer, split, context_length)
-    except AttributeError as e:
-        print(
-            f"It seems like the given method name ({func_name}) is not present in the dataset .py file ({module_path.as_posix()})."
+    except AttributeError:
+        logger.raise_error(
+            f"It seems like the given method name ({func_name}) is not present in the dataset .py file ({module_path.as_posix()}).",
+            AttributeError,
         )
-        raise e
 
 
 def get_data_collator(dataset_processer, dataset_config):
@@ -53,16 +57,20 @@ def get_data_collator(dataset_processer, dataset_config):
         module_path, func_name = dataset_config.file, "get_data_collator"
 
     if not module_path.endswith(".py"):
-        raise ValueError(f"Dataset file {module_path} is not a .py file.")
+        logger.raise_error(f"Dataset file {module_path} is not a .py file.", ValueError)
 
     module_path = Path(module_path)
     if not module_path.is_file():
-        raise FileNotFoundError(f"Dataset py file {module_path.as_posix()} does not exist or is not a file.")
+        logger.raise_error(
+            f"Dataset py file {module_path.as_posix()} does not exist or is not a file.", FileNotFoundError
+        )
 
     module = load_module_from_py_file(module_path.as_posix())
     try:
         return getattr(module, func_name)(dataset_processer)
     except AttributeError:
-        print(f"Can not find the custom data_collator in the dataset.py file ({module_path.as_posix()}).")
-        print("Using the default data_collator instead.")
+        logger.log_rank_zero(
+            f"Can not find the custom data_collator in the dataset.py file ({module_path.as_posix()})."
+        )
+        logger.log_rank_zero("Using the default data_collator instead.")
         return None
@@ -10,6 +10,8 @@
 from datasets import load_dataset
 from torch.utils.data import Dataset
 
+from QEfficient.finetune.utils.logging_utils import logger
+
 
 class grammar(Dataset):
     def __init__(self, tokenizer, csv_name=None, context_length=None):
@@ -19,11 +21,11 @@ def __init__(self, tokenizer, csv_name=None, context_length=None):
                 data_files={"train": [csv_name]},  # "eval": "grammar_validation.csv"},
                 delimiter=",",
             )
-        except Exception as e:
-            print(
-                "Loading of grammar dataset failed! Please see [here](https://github.com/meta-llama/llama-recipes/blob/main/src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) for details on how to download the dataset."
+        except FileNotFoundError:
+            logger.raise_error(
+                "Loading of grammar dataset failed! Please check (https://github.com/meta-llama/llama-recipes/blob/main/src/llama_recipes/datasets/grammar_dataset/grammar_dataset_process.ipynb) for details on how to download the dataset.",
+                FileNotFoundError,
             )
-            raise e
 
         self.context_length = context_length
         self.tokenizer = tokenizer
@@ -36,7 +38,7 @@ def convert_to_features(self, example_batch):
         # Create prompt and tokenize contexts and questions
 
         if self.print_text:
-            print("Input Text: ", self.clean_text(example_batch["text"]))
+            logger.log_rank_zero("Input Text: ", self.clean_text(example_batch["text"]))
 
         input_ = example_batch["input"]
         target_ = example_batch["target"]
@@ -71,9 +73,6 @@ def get_dataset(dataset_config, tokenizer, csv_name=None, context_length=None):
     """cover function for handling loading the working dataset"""
     """dataset loading"""
     currPath = Path.cwd() / "datasets_grammar" / "grammar_train.csv"
-    print(f"Loading dataset {currPath}")
-    csv_name = str(currPath)
-    print(csv_name)
-    dataset = grammar(tokenizer=tokenizer, csv_name=csv_name, context_length=context_length)
+    dataset = grammar(tokenizer=tokenizer, csv_name=str(currPath), context_length=context_length)
 
     return dataset
@@ -19,13 +19,14 @@
 from utils.train_utils import evaluation, print_model_size
 
 from QEfficient.finetune.configs.training import TrainConfig
+from QEfficient.finetune.utils.logging_utils import logger
 
 try:
     import torch_qaic  # noqa: F401
 
     device = "qaic:0"
 except ImportError as e:
-    print(f"Warning: {e}. Moving ahead without these qaic modules.")
+    logger.log_rank_zero(f"{e}. Moving ahead without these qaic modules.")
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 # Suppress all warnings
@@ -77,25 +78,20 @@ def main(**kwargs):
     # If there is a mismatch between tokenizer vocab size and embedding matrix,
     # throw a warning and then expand the embedding matrix
     if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
-        print("WARNING: Resizing the embedding matrix to match the tokenizer vocab size.")
+        logger.log_rank_zero("Resizing the embedding matrix to match the tokenizer vocab size.")
         model.resize_token_embeddings(len(tokenizer))
 
-    print_model_size(model, train_config)
+    print_model_size(model)
 
     if train_config.run_validation:
-        # TODO: vbaddi enable packing later in entire infra.
-        # if train_config.batching_strategy == "packing":
-        #    dataset_val = ConcatDataset(dataset_val, chunk_size=train_config.context_length)
-
         eval_dataloader = get_dataloader(tokenizer, dataset_config, train_config, split="test")
-
-        print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
         if len(eval_dataloader) == 0:
-            raise ValueError(
-                f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})"
+            logger.raise_error(
+                f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})",
+                ValueError,
             )
         else:
-            print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
+            logger.log_rank_zero(f"Number of Validation Set Batches loaded = {len(eval_dataloader)}")
 
     model.to(device)
     _ = evaluation(model, train_config, eval_dataloader, None, tokenizer, device)