diff --git a/src/llmcompressor/__init__.py b/src/llmcompressor/__init__.py
index 264d434f0..3f9f14ac3 100644
--- a/src/llmcompressor/__init__.py
+++ b/src/llmcompressor/__init__.py
@@ -36,7 +36,6 @@
 
 from llmcompressor.core.session_functions import (
     active_session,
-    apply,
     callbacks,
     create_session,
     finalize,
diff --git a/src/llmcompressor/core/__init__.py b/src/llmcompressor/core/__init__.py
index 171c95395..75335164d 100644
--- a/src/llmcompressor/core/__init__.py
+++ b/src/llmcompressor/core/__init__.py
@@ -11,7 +11,6 @@
 from llmcompressor.core.session_functions import (
     LifecycleCallbacks,
     active_session,
-    apply,
     callbacks,
     create_session,
     finalize,
diff --git a/src/llmcompressor/core/lifecycle.py b/src/llmcompressor/core/lifecycle.py
index 232d76b83..30654cf8c 100644
--- a/src/llmcompressor/core/lifecycle.py
+++ b/src/llmcompressor/core/lifecycle.py
@@ -20,7 +20,9 @@
 from llmcompressor.modifiers import StageModifiers
 from llmcompressor.recipe import RecipeContainer
 
-__all__ = ["CompressionLifecycle"]
+__all__ = [
+    "CompressionLifecycle",
+]
 
 
 @dataclass
diff --git a/src/llmcompressor/core/session.py b/src/llmcompressor/core/session.py
index 7c489f36f..888db3f1e 100644
--- a/src/llmcompressor/core/session.py
+++ b/src/llmcompressor/core/session.py
@@ -200,19 +200,6 @@ def finalize(self, **kwargs) -> ModifiedState:
             modifier_data=mod_data,
         )
 
-    def apply(self, **kwargs):
-        """
-        Apply the recipe in one-shot manner. This will invoke the initialize
-        and then finalize methods for each modifier in the session's lifecycle.
-        This will also set the session's state to the finalized state.
-
-        :param kwargs: additional kwargs to pass to the lifecycle's initialize and
-            finalize methods
-        """
-        self.initialize(**kwargs)
-
-        return self.finalize(**kwargs)
-
     def event(
         self,
         event_type: EventType,
diff --git a/src/llmcompressor/core/session_functions.py b/src/llmcompressor/core/session_functions.py
index 9a123a030..da54872c4 100644
--- a/src/llmcompressor/core/session_functions.py
+++ b/src/llmcompressor/core/session_functions.py
@@ -14,7 +14,6 @@
     "pre_initialize_structure",
     "initialize",
     "finalize",
-    "apply",
     "callbacks",
     "LifecycleCallbacks",
 ]
@@ -143,62 +142,6 @@ def finalize(**kwargs) -> ModifiedState:
     return active_session().finalize(**kwargs)
 
 
-def apply(
-    recipe: Union[str, List[str], "Recipe", List["Recipe"], None] = None,
-    recipe_stage: Union[str, List[str], None] = None,
-    recipe_args: Optional[Dict[str, Any]] = None,
-    model: Optional[Any] = None,
-    teacher_model: Optional[Any] = None,
-    train_data: Optional[Any] = None,
-    val_data: Optional[Any] = None,
-    test_data: Optional[Any] = None,
-    calib_data: Optional[Any] = None,
-    copy_data: bool = True,
-    start: Optional[float] = None,
-    steps_per_epoch: Optional[int] = None,
-    batches_per_step: Optional[int] = None,
-    **kwargs,
-) -> ModifiedState:
-    """
-    A method to apply the recipe in one-shot manner. This will invoke the initialize
-    and then finalize methods for each modifier in the active session's lifecycle.
-
-    :param recipe: the recipe to use for the sparsification, can be a path to a
-        recipe file, a raw recipe string, a recipe object, or a list of recipe objects.
-    :param recipe_stage: the stage to target for the sparsification
-    :param recipe_args: the args to use for overriding the recipe defaults
-    :param model: the model to sparsify
-    :param teacher_model: the teacher model to use for knowledge distillation
-    :param train_data: the training data to use for the sparsification
-    :param val_data: the validation data to use for the sparsification
-    :param test_data: the testing data to use for the sparsification
-    :param calib_data: the calibration data to use for the sparsification
-    :param copy_data: True to copy the data, False otherwise
-    :param start: the start epoch to use for the sparsification
-    :param steps_per_epoch: the number of steps per epoch to use for the
-        sparsification
-    :param batches_per_step: the number of batches per step to use for
-    :param kwargs: additional kwargs to pass to the current session's apply method
-    :return: the modified state of the active session after applying the recipe
-    """
-    return active_session().apply(
-        recipe=recipe,
-        recipe_stage=recipe_stage,
-        recipe_args=recipe_args,
-        model=model,
-        teacher_model=teacher_model,
-        train_data=train_data,
-        val_data=val_data,
-        test_data=test_data,
-        calib_data=calib_data,
-        copy_data=copy_data,
-        start=start,
-        steps_per_epoch=steps_per_epoch,
-        batches_per_step=batches_per_step,
-        **kwargs,
-    )
-
-
 class LifecycleCallbacks:
     """
     A class for invoking lifecycle events for the active session
diff --git a/src/llmcompressor/transformers/calibration/__init__.py b/src/llmcompressor/transformers/calibration/__init__.py
new file mode 100644
index 000000000..65fc2575f
--- /dev/null
+++ b/src/llmcompressor/transformers/calibration/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa
+
+from .oneshot import Oneshot
diff --git a/src/llmcompressor/transformers/calibration/oneshot.py b/src/llmcompressor/transformers/calibration/oneshot.py
new file mode 100644
index 000000000..4601a02b1
--- /dev/null
+++ b/src/llmcompressor/transformers/calibration/oneshot.py
@@ -0,0 +1,263 @@
+from pathlib import PosixPath
+from typing import Optional
+
+from loguru import logger
+from torch.utils.data import DataLoader
+
+from llmcompressor.core.session_functions import active_session
+from llmcompressor.transformers.finetune.data.data_helpers import (
+    get_calibration_dataloader,
+)
+from llmcompressor.transformers.finetune.text_generation import (
+    initialize_model_from_path,
+    initialize_processor_from_path,
+    parse_args,
+)
+from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
+    modify_save_pretrained,
+    patch_tied_tensors_bug,
+)
+from llmcompressor.transformers.utils.arg_parser import DEFAULT_OUTPUT_DIR
+
+__all__ = ["Oneshot"]
+
+
+class Oneshot:
+    """
+    Class responsible for carrying out one-shot calibration on a pretrained model.
+
+    This class handles the entire lifecycle of one-shot calibration, including
+    preprocessing (model and tokenizer/processor initialization), model optimization
+    (quantization or sparsification), and postprocessing (saving outputs). The
+    intructions for model optimization can be specified by using a recipe (fine-grain
+    details) or by using a scheme (ex. W4A16, W8A8, W4A8).
+
+    - **Input Keyword Arguments:**
+        `kwargs` are parsed into:
+        - `model_args`: Arguments for loading and configuring a pretrained model
+          (e.g., `AutoModelForCausalLM`).
+        - `data_args`: Arguments for dataset-related configurations, such as
+          calibration dataloaders.
+        - `recipe_args`: Arguments for defining and configuring recipes that specify
+          optimization actions.
+
+        Parsers are defined in `src/llmcompressor/transformers/utils/arg_parser`.
+
+    - **Lifecycle Overview:**
+        The calibration lifecycle consists of three steps:
+        1. **Preprocessing**:
+            - Instantiates a pretrained model and tokenizer/processor.
+            - Ensures input and output embedding layers are untied if they share
+              tensors.
+            - Patches the model to include additional functionality for saving with
+              quantization configurations.
+        2. **Oneshot Calibration**:
+            - Optimizes the model using a global `CompressionSession` and applies
+              recipe-defined modifiers (e.g., `GPTQModifier`, `SparseGPTModifier`)
+        3. **Postprocessing**:
+            - Saves the model, tokenizer/processor, and configuration to the specified
+              `output_dir`.
+
+    - **Usage:**
+        ```python
+        oneshot = Oneshot(model=model, recipe=recipe, dataset=dataset)
+        oneshot.run()
+
+        # Access the processed components
+        model = oneshot.model
+        tokenizer_or_processor = oneshot.tokenizer_or_processor
+        recipe = oneshot.recipe
+        ```
+
+    Methods:
+        __init__(**kwargs):
+            Initializes the `Oneshot` object by parsing input arguments, performing
+            preprocessing, and setting instance attributes.
+
+        run(**kwargs):
+            Performs the one-shot calibration process by preparing a calibration
+            dataloader, applying recipe modifiers to the model, and executing
+            postprocessing steps.
+
+        save():
+            Saves the calibrated model and tokenizer/processor to the specified
+            `output_dir`. Supports saving in compressed formats based on model
+            arguments.
+
+        _apply_recipe_modifiers(calibration_dataloader, **kwargs):
+            Applies lifecycle actions (e.g., `initialize`, `finalize`) using modifiers
+            defined in the recipe. Each action is executed via the global
+            `CompressionSession`.
+
+        _pre_process():
+            Handles preprocessing steps, including model initialization,
+            tokenizer/processor setup, and resolving tied embedding issues.
+
+        _warn_tied_embeddings():
+            Logs a warning if `tie_word_embeddings=True`, which may interfere with
+            saving in the one-shot workflow.
+
+        _post_process():
+            Executes postprocessing steps such as saving the model and resetting
+            lifecycle actions, especially when a custom `output_dir` is specified.
+    """
+
+    MODIFIER_LIFECYCLE_ACTIONS = (
+        "initialize",
+        "finalize",
+    )
+
+    def __init__(self, **kwargs):
+        """
+        Initializes the `Oneshot` class with provided arguments.
+
+        Parses the input keyword arguments into `model_args`, `data_args`, and
+        `recipe_args`. Performs preprocessing to initialize the model and
+        tokenizer/processor.
+
+        Args:
+            kwargs: Arbitrary keyword arguments for model, data, and recipe
+            configurations.
+        """
+        self.model_args, self.data_args, self.recipe_args, _, self.output_dir = (
+            parse_args(**kwargs)
+        )
+
+        # Preprocess the model and tokenizer/processor
+        self._pre_process()
+
+        # Set instance attributes
+        self.model = self.model_args.model
+        self.tokenizer_or_processor = self.model_args.processor
+        self.recipe = self.recipe_args.recipe
+
+    def run(self, **kwargs):
+        """
+        Performs one-shot calibration.
+
+        This method prepares a calibration dataloader using dataset arguments and
+        applies recipe-based modifiers to optimize the model. The lifecycle actions
+        are executed sequentially, and the modified model is saved during
+        postprocessing.
+
+        Args:
+            kwargs: Additional keyword arguments for the recipe modifiers.
+        """
+        calibration_dataloader = get_calibration_dataloader(
+            self.data_args, self.tokenizer_or_processor
+        )
+        self._apply_recipe_modifiers(
+            calibration_dataloader=calibration_dataloader, **kwargs
+        )
+        self._post_process()
+
+    def save(self):
+        """
+        Saves the model and tokenizer/processor to the output directory.
+
+        The model is saved in a compressed format if specified in `model_args`.
+        The tokenizer or processor, if available, is also saved.
+
+        Raises:
+            ValueError: If saving fails due to an invalid `output_dir` or other issues.
+        """
+        self.model.save_pretrained(
+            self.output_dir,
+            save_compressed=self.model_args.save_compressed,
+        )
+        if self.tokenizer_or_processor:
+            self.tokenizer_or_processor.save_pretrained(self.output_dir)
+
+    def _apply_recipe_modifiers(
+        self, calibration_dataloader: Optional[DataLoader], **kwargs
+    ):
+        """
+        Applies recipe modifiers to the model during the lifecycle.
+
+        The modifiers are defined in the recipe and executed via lifecycle actions
+        (`initialize`, `finalize`) through the global `CompressionSession`.
+
+        Args:
+            calibration_dataloader (Optional[DataLoader]): Dataloader for calibration
+            data.
+            kwargs: Additional arguments for lifecycle actions.
+
+        Raises:
+            RuntimeError: If any modifier fails during execution.
+        """
+        for action in self.MODIFIER_LIFECYCLE_ACTIONS:
+            session = active_session()
+            session_action = getattr(session, action)
+            session_action(
+                model=self.model,
+                recipe=self.recipe,
+                recipe_args=self.recipe_args.recipe_args,
+                calib_data=calibration_dataloader,
+                start=-1,  # oneshot-specific argument
+                copy_data=False,
+                min_tokens_per_module=getattr(self, "min_tokens_per_module", None),
+                **kwargs,
+            )
+
+    def _pre_process(self):
+        """
+        Prepares the model and tokenizer/processor for calibration.
+
+        - Initializes the model if it's specified as a path or string.
+        - Applies patches to fix tied tensor issues and modifies `save_pretrained`
+          behavior.
+        - Initializes the processor if specified as a path or `None`.
+        - Sets the minimum tokens per module if `data_args` are provided.
+
+        Raises:
+            FileNotFoundError: If the model or processor path is invalid.
+        """
+        self._warn_tied_embeddings()
+
+        # Initialize model
+        if isinstance(self.model_args.model, (str, PosixPath)):
+            self.model_args.model, _ = initialize_model_from_path(self.model_args)
+
+        patch_tied_tensors_bug(self.model_args.model)
+        modify_save_pretrained(self.model_args.model)
+
+        # Initialize processor
+        if isinstance(self.model_args.processor, (str, type(None))):
+            self.model_args.processor = initialize_processor_from_path(
+                self.model_args, self.model_args.model
+            )
+
+        # Set minimum tokens per module if data arguments are provided
+        if self.data_args:
+            self.min_tokens_per_module = self.data_args.min_tokens_per_module
+
+    def _warn_tied_embeddings(self):
+        """
+        Logs a warning if the model has tied word embeddings.
+
+        The `tie_word_embeddings` flag may cause issues during saving in the one-shot
+        calibration workflow due to shared tensor addresses.
+        """
+        if self.model_args.tie_word_embeddings:
+            logger.debug(
+                "The tie_word_embeddings flag is by default set to False. "
+                "This guarantees that the one-shot algorithm saves the final "
+                "weights without errors. Detected tie_word_embeddings=True. "
+                "This may cause issues with the one-shot algorithm on save."
+            )
+
+    def _post_process(self):
+        """
+        Executes post-calibration steps.
+
+        This method saves the model and resets lifecycle actions if the `output_dir`
+        is not the default directory.
+
+        Raises:
+            ValueError: If saving fails due to invalid configurations.
+        """
+        if (
+            isinstance(self.model_args.model, str)
+            or self.output_dir != DEFAULT_OUTPUT_DIR
+        ):
+            self.save()
diff --git a/src/llmcompressor/transformers/compression/sparsity_config.py b/src/llmcompressor/transformers/compression/sparsity_config.py
index 1183023b3..6d3155922 100644
--- a/src/llmcompressor/transformers/compression/sparsity_config.py
+++ b/src/llmcompressor/transformers/compression/sparsity_config.py
@@ -12,7 +12,7 @@
 from torch import Tensor
 from torch.nn import Module
 
-from llmcompressor.core import active_session
+from llmcompressor.core import CompressionLifecycle, active_session
 from llmcompressor.pytorch.utils import ModuleSparsificationInfo
 from llmcompressor.transformers.compression.helpers import (
     infer_sparse_targets_and_ignores,
@@ -47,7 +47,10 @@ def infer_global_sparsity(
         return global_sparsity
 
     @staticmethod
-    def infer_sparsity_structure(model: Optional[Module] = None) -> str:
+    def infer_sparsity_structure(
+        model: Optional[Module] = None,
+        stage_modifiers: Optional[CompressionLifecycle] = None,
+    ) -> str:
         """
         Determines what sparsity structure, if any, was applied.
 
@@ -107,7 +110,7 @@ def from_pretrained(
             return None
 
         sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
-            model=model
+            model=model,
         )
         if (
             disable_sparse_compression
diff --git a/src/llmcompressor/transformers/finetune/__init__.py b/src/llmcompressor/transformers/finetune/__init__.py
index aad70ae2c..6c75b902b 100644
--- a/src/llmcompressor/transformers/finetune/__init__.py
+++ b/src/llmcompressor/transformers/finetune/__init__.py
@@ -1,7 +1,5 @@
 # flake8: noqa
 
-from .data import DataTrainingArguments, TextGenerationDataset
-from .model_args import ModelArguments
+from .data import TextGenerationDataset
 from .session_mixin import SessionManagerMixIn
 from .text_generation import apply, compress, eval, oneshot, train
-from .training_args import TrainingArguments
diff --git a/src/llmcompressor/transformers/finetune/data/__init__.py b/src/llmcompressor/transformers/finetune/data/__init__.py
index ddf0b2364..a53caed1b 100644
--- a/src/llmcompressor/transformers/finetune/data/__init__.py
+++ b/src/llmcompressor/transformers/finetune/data/__init__.py
@@ -4,7 +4,6 @@
 from .c4 import C4Dataset
 from .cnn_dailymail import CNNDailyMailDataset
 from .custom import CustomDataset
-from .data_args import DataTrainingArguments
 from .evolcodealpaca import EvolCodeAlpacaDataset
 from .flickr_30k import Flickr30K
 from .gsm8k import GSM8KDataset
diff --git a/src/llmcompressor/transformers/finetune/data/base.py b/src/llmcompressor/transformers/finetune/data/base.py
index 81a3fc95f..ec754fe4f 100644
--- a/src/llmcompressor/transformers/finetune/data/base.py
+++ b/src/llmcompressor/transformers/finetune/data/base.py
@@ -8,12 +8,12 @@
 from datasets.formatting.formatting import LazyRow
 from loguru import logger
 
-from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments
 from llmcompressor.transformers.finetune.data.data_helpers import (
     LABELS_MASK_VALUE,
     get_custom_datasets_from_path,
     get_raw_dataset,
 )
+from llmcompressor.transformers.utils.arg_parser import DatasetArguments
 from llmcompressor.transformers.utils.preprocessing_functions import (
     PreprocessingFunctionRegistry,
 )
@@ -41,7 +41,7 @@ class TextGenerationDataset(RegistryMixin):
 
     def __init__(
         self,
-        data_args: DataTrainingArguments,
+        data_args: DatasetArguments,
         split: str,
         processor: Processor,
     ):
diff --git a/src/llmcompressor/transformers/finetune/data/data_helpers.py b/src/llmcompressor/transformers/finetune/data/data_helpers.py
index 23c70e561..6020cd17d 100644
--- a/src/llmcompressor/transformers/finetune/data/data_helpers.py
+++ b/src/llmcompressor/transformers/finetune/data/data_helpers.py
@@ -1,9 +1,11 @@
 import logging
 import os
+import re
 from typing import Any, Callable, Dict, List, Optional
 
 import torch
 from datasets import Dataset, load_dataset
+from loguru import logger
 from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
 from transformers.data import default_data_collator
 
@@ -15,6 +17,7 @@
     "get_raw_dataset",
     "make_dataset_splits",
     "get_custom_datasets_from_path",
+    "get_calibration_dataloader",
 ]
 
 
@@ -243,3 +246,76 @@ def do_transform(candidate: str) -> bool:
             transform_dataset_key(dataset_key)
 
     return data_files
+
+
+def get_calibration_dataloader(
+    data_args,
+    processor,
+    add_labels: bool = False,  # for oneshot
+    do_oneshot=True,
+):
+    """
+    Loads datasets for each flow based on data_args, stores a Dataset for each
+    enabled flow in self.datasets
+
+    :param processor: processor or tokenizer to use for dataset tokenization
+    :param add_labels: if True, add labels column to dataset splits
+    """
+    if data_args.dataset is None:
+        logger.info(
+            "Running oneshot without calibration data. This is expected for "
+            "weight-only and dynamic quantization"
+        )
+        return
+
+    splits = data_args.splits
+    tokenized_datasets = {}
+
+    def _get_split_name(inp_str):
+        # strip out split name, for ex train[60%:] -> train
+        match = re.match(r"(\w*)\[.*\]", inp_str)
+        if match is not None:
+            return match.group(1)
+        return inp_str
+
+    if splits is None:
+        splits = {"all": None}
+    elif isinstance(splits, str):
+        splits = {_get_split_name(splits): splits}
+    elif isinstance(splits, List):
+        splits = {_get_split_name(s): s for s in splits}
+
+    # default to custom dataset if dataset provided isn't a string
+    registry_id = data_args.dataset if isinstance(data_args.dataset, str) else "custom"
+    for split_name, split_str in splits.items():
+        dataset = data_args.dataset
+        if hasattr(dataset, "column_names") and "input_ids" in dataset.column_names:
+            # dataset is already tokenized
+            tokenized_datasets[split_name] = dataset
+        else:
+            # dataset needs to be tokenized
+            from llmcompressor.transformers.finetune.data.base import (
+                TextGenerationDataset,
+            )
+
+            dataset_manager = TextGenerationDataset.load_from_registry(
+                registry_id,
+                data_args=data_args,
+                split=split_str,
+                processor=processor,
+            )
+            tokenized_datasets[split_name] = dataset_manager(add_labels=add_labels)
+
+    datasets = make_dataset_splits(
+        tokenized_datasets,
+        do_oneshot=do_oneshot,
+    )
+
+    calibration_dataset = datasets.get("calibration")
+
+    return format_calibration_data(
+        tokenized_dataset=calibration_dataset,
+        num_calibration_samples=data_args.num_calibration_samples,
+        do_shuffle=data_args.shuffle_calibration_samples,
+        collate_fn=data_args.data_collator,
+    )
diff --git a/src/llmcompressor/transformers/finetune/runner.py b/src/llmcompressor/transformers/finetune/runner.py
index 0a07c45eb..c1aec5164 100644
--- a/src/llmcompressor/transformers/finetune/runner.py
+++ b/src/llmcompressor/transformers/finetune/runner.py
@@ -16,13 +16,20 @@
 from llmcompressor.pytorch.utils import tensors_to_device
 from llmcompressor.recipe import Recipe, StageRunType
 from llmcompressor.transformers.finetune.data import TextGenerationDataset
-from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments
 from llmcompressor.transformers.finetune.data.data_helpers import (
     format_calibration_data,
     make_dataset_splits,
 )
-from llmcompressor.transformers.finetune.model_args import ModelArguments
-from llmcompressor.transformers.finetune.training_args import TrainingArguments
+from llmcompressor.transformers.utils.arg_parser import (
+    DatasetArguments,
+    ModelArguments,
+    RecipeArguments,
+    TrainingArguments,
+)
+from llmcompressor.transformers.utils.arg_parser.training_arguments import (
+    DEFAULT_OUTPUT_DIR,
+)
+from llmcompressor.transformers.utils.arg_parser.utils import get_dataclass_as_dict
 from llmcompressor.typing import Processor
 from llmcompressor.utils.fsdp.helpers import is_fsdp_model, save_model_and_recipe
 
@@ -46,13 +53,15 @@ class StageRunner:
 
     def __init__(
         self,
-        data_args: "DataTrainingArguments",
+        data_args: "DatasetArguments",
         model_args: "ModelArguments",
         training_args: "TrainingArguments",
+        recipe_args: "RecipeArguments",
     ):
         self._data_args = data_args
         self._model_args = model_args
         self._training_args = training_args
+        self._recipe_args = recipe_args
 
         self.datasets = {}
         self.trainer = None
@@ -214,7 +223,7 @@ def run_sequential_stages(self, checkpoint: Optional[str] = None):
         :param checkpoint: optional checkpoint to pick up a stage from
         """
 
-        recipe_obj = Recipe.create_instance(self._training_args.recipe)
+        recipe_obj = Recipe.create_instance(self._recipe_args.recipe)
         with self.trainer.accelerator.main_process_first():
             checkpoint_dir = self._model_args.model
             completed_stages = get_completed_stages(checkpoint_dir)
@@ -251,21 +260,30 @@ def run_sequential_stages(self, checkpoint: Optional[str] = None):
 
             # run stage
             if run_type is StageRunType.ONESHOT:
-                self.one_shot(stage=stage_name)
+                from llmcompressor.transformers.calibration import Oneshot
+
+                model = get_session_model()
+                self._model_args.model = model
+
+                oneshot = Oneshot(
+                    output_dir=self._training_args.output_dir,
+                    **get_dataclass_as_dict(self._model_args, ModelArguments),
+                    **get_dataclass_as_dict(self._data_args, DatasetArguments),
+                    **get_dataclass_as_dict(self._recipe_args, RecipeArguments),
+                )
+
+                oneshot.run(stage_name=stage_name)
             elif run_type is StageRunType.TRAIN:
                 self.train(checkpoint=checkpoint, stage=stage_name)
             checkpoint = None
 
-            if (
-                self._training_args.output_dir
-                != TrainingArguments.__dataclass_fields__["output_dir"].default
-            ):
+            if self._training_args.output_dir != DEFAULT_OUTPUT_DIR:
                 save_model_and_recipe(
                     model=self.trainer.model,
                     save_path=self._output_dir,
                     processor=self.processor,
                     save_safetensors=self._training_args.save_safetensors,
-                    save_compressed=self._training_args.save_compressed,
+                    save_compressed=self._model_args.save_compressed,
                 )
 
             # save stage to checkpoint dir
diff --git a/src/llmcompressor/transformers/finetune/session_mixin.py b/src/llmcompressor/transformers/finetune/session_mixin.py
index 27860aeb4..07b9ba1ef 100644
--- a/src/llmcompressor/transformers/finetune/session_mixin.py
+++ b/src/llmcompressor/transformers/finetune/session_mixin.py
@@ -7,13 +7,12 @@
 import torch
 from loguru import logger
 from torch.nn import Module
-from torch.utils.data import DataLoader, IterableDataset
+from torch.utils.data import IterableDataset
 from transformers.trainer_callback import TrainerState
 from transformers.trainer_utils import get_last_checkpoint
 
 from llmcompressor.core import (
     active_session,
-    apply,
     callbacks,
     create_session,
     finalize,
@@ -36,8 +35,10 @@
 from llmcompressor.utils.pytorch import qat_active
 
 if TYPE_CHECKING:
-    from llmcompressor.transformers import DataTrainingArguments
-
+    from llmcompressor.transformers.utils.arg_parser import (
+        DatasetArguments,
+        ModelArguments,
+    )
 
 __all__ = [
     "SessionManagerMixIn",
@@ -68,12 +69,14 @@ def __init__(
         self,
         recipe: Optional[str] = None,
         recipe_args: Optional[Union[Dict[str, Any], str]] = None,
-        data_args: Optional["DataTrainingArguments"] = None,
+        data_args: Optional["DatasetArguments"] = None,
+        model_args: Optional["ModelArguments"] = None,
         teacher: Optional[Union[Module, str]] = None,
         **kwargs,
     ):
         self.recipe = recipe
         self.recipe_args = recipe_args
+        self.model_args = model_args
         self.teacher = teacher
 
         # parse training and metadata args
@@ -374,8 +377,8 @@ def train(self, *args, stage: Optional[str] = None, **kwargs):
         self.initialize_session(epoch=epoch, checkpoint=checkpoint, stage=stage)
 
         # do not save checkpoints as compressed
-        original_save_compressed = self.args.save_compressed
-        self.args.save_compressed = False
+        original_save_compressed = self.model_args.save_compressed
+        self.model_args.save_compressed = False
 
         # train with accelerator
         self.accelerator.wait_for_everyone()
@@ -383,7 +386,7 @@ def train(self, *args, stage: Optional[str] = None, **kwargs):
         self.accelerator.wait_for_everyone()
 
         # restore original setting for saving final model
-        self.args.save_compressed = original_save_compressed
+        self.model_args.save_compressed = original_save_compressed
 
         # lifecycle
         self.finalize_session()
@@ -428,31 +431,6 @@ def predict(self, *args, **kwargs):
 
         return output
 
-    def one_shot(
-        self, calibration_data: Optional[DataLoader] = None, stage: Optional[str] = None
-    ):
-        """
-        Run oneshot calibration on the active model
-
-        :param stage: which stage of the recipe to run, or None to run whole recipe
-        :param calib_data: dataloader of calibration data
-        """
-        apply(
-            recipe=self.recipe,
-            recipe_stage=stage,
-            recipe_args=self.recipe_args,
-            model=self.model,
-            calib_data=calibration_data,
-            start=-1,
-            copy_data=False,
-            accelerator=self.accelerator,
-            min_tokens_per_module=self.min_tokens_per_module,
-        )
-
-        # log model sparsity
-        # self.maybe_log_model_sparsification()
-        self.accelerator.wait_for_everyone()
-
     def save_model(self, output_dir: str, _internal_call=False, _is_oneshot=False):
         """
         Override of the save_model function and expects it to exist in the parent.
@@ -474,7 +452,7 @@ def save_model(self, output_dir: str, _internal_call=False, _is_oneshot=False):
         if not is_fsdp_model(self.model):
             self.model.save_pretrained(
                 output_dir,
-                save_compressed=self.args.save_compressed,
+                save_compressed=self.model_args.save_compressed,
                 safe_serialization=self.args.save_safetensors,
             )
         else:  # FSDP model
@@ -482,7 +460,7 @@ def save_model(self, output_dir: str, _internal_call=False, _is_oneshot=False):
                 model=self.model,
                 accelerator=self.accelerator,
                 output_dir=output_dir,
-                save_compressed=self.args.save_compressed,
+                save_compressed=self.model_args.save_compressed,
                 save_safetensors=self.metadata.get("save_safetensors", False),
             )
 
diff --git a/src/llmcompressor/transformers/finetune/text_generation.py b/src/llmcompressor/transformers/finetune/text_generation.py
index 65747f71d..53b506027 100644
--- a/src/llmcompressor/transformers/finetune/text_generation.py
+++ b/src/llmcompressor/transformers/finetune/text_generation.py
@@ -20,6 +20,7 @@
 import os
 import warnings
 from pathlib import PosixPath
+from typing import Optional
 
 from loguru import logger
 from transformers import (
@@ -39,18 +40,22 @@
     parse_dtype,
 )
 from llmcompressor.recipe import Recipe, StageRunType
-from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments
-from llmcompressor.transformers.finetune.model_args import ModelArguments
 from llmcompressor.transformers.finetune.runner import StageRunner
 from llmcompressor.transformers.finetune.trainer import Trainer
-from llmcompressor.transformers.finetune.training_args import TrainingArguments
 from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
     modify_fsdp_model_save_pretrained,
     modify_save_pretrained,
     patch_tied_tensors_bug,
 )
 from llmcompressor.transformers.sparsification.sparse_model import (
-    get_shared_processor_src,
+    get_processor_from_model,
+)
+from llmcompressor.transformers.utils.arg_parser import (
+    DEFAULT_OUTPUT_DIR,
+    DatasetArguments,
+    ModelArguments,
+    RecipeArguments,
+    TrainingArguments,
 )
 from llmcompressor.transformers.utils.helpers import detect_last_checkpoint
 from llmcompressor.typing import Processor
@@ -61,27 +66,33 @@ def train(**kwargs):
     """
     CLI entrypoint for running training
     """
-    model_args, data_args, training_args = parse_args(**kwargs)
+    model_args, data_args, recipe_args, training_args, _ = parse_args(
+        include_training_args=True, **kwargs
+    )
     training_args.do_train = True
-    main(model_args, data_args, training_args)
+    main(model_args, data_args, recipe_args, training_args)
 
 
 def eval(**kwargs):
     """
     CLI entrypoint for running evaluation
     """
-    model_args, data_args, training_args = parse_args(**kwargs)
+    model_args, data_args, recipe_args, training_args, _ = parse_args(
+        include_training_args=True, **kwargs
+    )
     training_args.do_eval = True
-    main(model_args, data_args, training_args)
+    main(model_args, data_args, recipe_args, training_args)
 
 
 def oneshot(**kwargs):
+    from llmcompressor.transformers.calibration.oneshot import Oneshot
+
     """
     CLI entrypoint for running oneshot calibration
     """
-    model_args, data_args, training_args = parse_args(**kwargs)
-    training_args.do_oneshot = True
-    main(model_args, data_args, training_args)
+    oneshot = Oneshot(**kwargs)
+    oneshot.run()
+    return oneshot
 
 
 # alias
@@ -93,12 +104,15 @@ def apply(**kwargs):
     CLI entrypoint for any of training, eval, predict or oneshot
     """
     report_to = kwargs.get("report_to", None)
-    model_args, data_args, training_args = parse_args(**kwargs)
+    model_args, data_args, recipe_args, training_args, _ = parse_args(
+        include_training_args=True, **kwargs
+    )
+
     training_args.run_stages = True
     if report_to is None:  # user didn't specify any reporters
         # get rid of the reporters inferred from hugging face
         training_args.report_to = []
-    main(model_args, data_args, training_args)
+    main(model_args, data_args, recipe_args, training_args)
 
 
 def compress(**kwargs):
@@ -107,60 +121,81 @@ def compress(**kwargs):
 
 def load_dataset(dataset_name: str, **kwargs):
     parser = HfArgumentParser(
-        (ModelArguments, DataTrainingArguments, TrainingArguments)
+        (ModelArguments, DatasetArguments, RecipeArguments, TrainingArguments)
     )
-    model_args, data_args, training_args = parser.parse_dict(kwargs)
+    _, data_args, _, _ = parser.parse_dict(kwargs)
     data_args["dataset_name"] = dataset_name
 
 
-def parse_args(**kwargs):
+def parse_args(include_training_args: bool = False, **kwargs):
     """
     Parses kwargs by grouping into model, data or training arg groups:
         * model_args in src/llmcompressor/transformers/finetune/model_args.py
         * data_args in src/llmcompressor/transformers/finetune/data/data_args.py
+        * recipe_args in src/llmcompressor/transformers/utils/recipe_args.py
         * training_args in src/llmcompressor/transformers/finetune/training_args.py
 
-    Throws depreciation warnings
+    Throws deprecation warnings
+
+    :param include_training_args: Add training_args in the output if set to True.
+        Note that instating trainng_args will reset HF accelerator and change its
+        internal state. This dataclass should only be instatiated once to avoid
+        conflict with accelerate library's accelerator.
+
     """
-    parser = HfArgumentParser(
-        (ModelArguments, DataTrainingArguments, TrainingArguments)
-    )
-    if not kwargs:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    output_dir = kwargs.pop("output_dir", DEFAULT_OUTPUT_DIR)
+
+    if include_training_args:
+        parser = HfArgumentParser(
+            (ModelArguments, DatasetArguments, RecipeArguments, TrainingArguments)
+        )
     else:
-        model_args, data_args, training_args = parser.parse_dict(kwargs)
+        parser = HfArgumentParser((ModelArguments, DatasetArguments, RecipeArguments))
 
-    if training_args.recipe_args is not None:
-        if not isinstance(training_args.recipe_args, dict):
-            arg_dict = {}
-            for recipe_arg in training_args.recipe_args:
-                key, value = recipe_arg.split("=")
-                arg_dict[key] = value
-            training_args.recipe_args = arg_dict
+    if not kwargs:
+        parsed_args = parser.parse_args_into_dataclasses()
+    else:
+        parsed_args = parser.parse_dict(kwargs)
 
-    # raise depreciation warnings
+    # Unpack parsed arguments based on the presence of training arguments
+    if include_training_args:
+        model_args, data_args, recipe_args, training_args = parsed_args
+        if output_dir is not None:
+            training_args.output_dir = output_dir
+    else:
+        model_args, data_args, recipe_args = parsed_args
+        training_args = None
+
+    if recipe_args.recipe_args is not None:
+        if not isinstance(recipe_args.recipe_args, dict):
+            recipe_args.recipe_args = {
+                key: value
+                for arg in recipe_args.recipe_args
+                for key, value in [arg.split("=")]
+            }
+
+    # Raise deprecation warnings
     if data_args.remove_columns is not None:
         warnings.warn(
-            "`remove_columns` argument is depreciated. When tokenizing datasets, all "
-            "columns which are invalid inputs the tokenizer will be removed",
+            "`remove_columns` argument is deprecated. When tokenizing datasets, all "
+            "columns which are invalid inputs to the tokenizer will be removed.",
             DeprecationWarning,
         )
 
-    # silently assign tokenizer to processor
+    # Silently assign tokenizer to processor
     if model_args.tokenizer:
         if model_args.processor:
-            raise ValueError("Cannot use both a tokenizer and processor")
+            raise ValueError("Cannot use both a tokenizer and processor.")
         model_args.processor = model_args.tokenizer
-    model_args.tokenizer = None
+        model_args.tokenizer = None
 
-    return model_args, data_args, training_args
+    return model_args, data_args, recipe_args, training_args, output_dir
 
 
 def initialize_model_from_path(
     model_args: ModelArguments,
-    training_args: TrainingArguments,
+    training_args: Optional[TrainingArguments] = None,
 ):
-    last_checkpoint = detect_last_checkpoint(training_args, model_args=model_args)
     # Load pretrained model
     # The .from_pretrained methods guarantee that only one local process can
     # concurrently download model & vocab.
@@ -173,16 +208,23 @@ def initialize_model_from_path(
         tie_word_embeddings=model_args.tie_word_embeddings,
         trust_remote_code=model_args.trust_remote_code_model,
     )
-    teacher_config = (
-        AutoConfig.from_pretrained(
-            model_args.distill_teacher,
-            use_auth_token=True if model_args.use_auth_token else None,
-            tie_word_embeddings=model_args.tie_word_embeddings,
-            trust_remote_code=model_args.trust_remote_code_model,
+
+    last_checkpoint = None
+
+    if training_args is not None:
+        teacher_config = (
+            AutoConfig.from_pretrained(
+                model_args.distill_teacher,
+                use_auth_token=True if model_args.use_auth_token else None,
+                tie_word_embeddings=model_args.tie_word_embeddings,
+                trust_remote_code=model_args.trust_remote_code_model,
+            )
+            if model_args.distill_teacher
+            else None
         )
-        if model_args.distill_teacher
-        else None
-    )
+        last_checkpoint = detect_last_checkpoint(training_args, model_args=model_args)
+        # Set seed before initializing model.
+        set_seed(training_args.seed)
 
     model_path = (
         last_checkpoint or model_args.model
@@ -190,21 +232,18 @@ def initialize_model_from_path(
         else model_args.model_name_or_path
     )
 
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
     # Fallback to CPU if GPU requested and not available
-    training_args.oneshot_device = fallback_to_cpu(training_args.oneshot_device)
+    model_args.oneshot_device = fallback_to_cpu(model_args.oneshot_device)
 
     # Trainer handles device assignment for FSDP and training, don't do mapping here
     # if running oneshot outside of FSDP, apply user device settings
-    device_map = None
+
     fsdp_enabled = os.environ.get("ACCELERATE_USE_FSDP", "false") == "true"
-    if not fsdp_enabled and training_args.do_oneshot:
-        device_map = training_args.oneshot_device
-        logger.warning(f"Moving {model_path} to device {device_map} for One-Shot")
-    elif not fsdp_enabled:
+
+    device_map = model_args.oneshot_device
+    if not fsdp_enabled and training_args is not None and training_args.do_train:
         device_map = "auto"
+
     model_kwargs = {
         "config": config,
         "cache_dir": model_args.cache_dir,
@@ -214,15 +253,7 @@ def initialize_model_from_path(
         "device_map": device_map,
         "trust_remote_code": model_args.trust_remote_code_model,
     }
-    teacher_device_map = None if fsdp_enabled else "auto"
-    teacher_kwargs = {
-        "config": teacher_config,
-        "cache_dir": model_args.cache_dir,
-        "use_auth_token": True if model_args.use_auth_token else None,
-        "torch_dtype": parse_dtype(model_args.precision),
-        "device_map": teacher_device_map,
-        "trust_remote_code": model_args.trust_remote_code_model,
-    }
+
     # this calls from_pretrained under the hood so should be FSDP safe
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
@@ -231,25 +262,38 @@ def initialize_model_from_path(
     if "sequence_length" in model_kwargs:
         model.seqlen = model_kwargs["sequence_length"]
 
-    teacher = (
-        AutoModelForCausalLM.from_pretrained(
-            model_args.distill_teacher,
-            **teacher_kwargs,
+    teacher = None
+    if training_args is not None:
+        teacher_device_map = None if fsdp_enabled else "auto"
+        teacher_kwargs = {
+            "config": teacher_config,
+            "cache_dir": model_args.cache_dir,
+            "use_auth_token": True if model_args.use_auth_token else None,
+            "torch_dtype": parse_dtype(model_args.precision),
+            "device_map": teacher_device_map,
+            "trust_remote_code": model_args.trust_remote_code_model,
+        }
+
+        teacher = (
+            AutoModelForCausalLM.from_pretrained(
+                model_args.distill_teacher,
+                **teacher_kwargs,
+            )
+            if model_args.distill_teacher is not None
+            else None
         )
-        if model_args.distill_teacher is not None
-        else None
-    )
-    if teacher is not None and "sequence_length" in teacher_kwargs:
-        teacher.seqlen = teacher_kwargs["sequence_length"]
+        if teacher is not None and "sequence_length" in teacher_kwargs:
+            teacher.seqlen = teacher_kwargs["sequence_length"]
 
-    return teacher, model_path, model
+    return model, teacher
 
 
 def initialize_processor_from_path(
-    model_args: ModelArguments, model: PreTrainedModel, teacher: PreTrainedModel
+    model_args: ModelArguments,
+    model: PreTrainedModel,
+    teacher: Optional[PreTrainedModel] = None,
 ) -> Processor:
-    processor_src = model_args.processor
-    processor_src = processor_src or get_shared_processor_src(model, teacher)
+    processor_src = model_args.processor or get_processor_from_model(model, teacher)
     # The use_fast=True option is not currently supported safely in Transformers
     # See: https://github.com/huggingface/transformers/pull/34836#issuecomment-2491809727  # noqa: E501
     try:
@@ -277,7 +321,8 @@ def initialize_processor_from_path(
 
 def main(
     model_args: ModelArguments,
-    data_args: DataTrainingArguments,
+    data_args: DatasetArguments,
+    recipe_args: RecipeArguments,
     training_args: TrainingArguments,
 ):
     """
@@ -312,8 +357,8 @@ def main(
         )
 
     # Setup based on stage types if running stage mode
-    if training_args.run_stages and training_args.recipe is not None:
-        recipe_obj = Recipe.create_instance(training_args.recipe)
+    if training_args.run_stages and recipe_args.recipe is not None:
+        recipe_obj = Recipe.create_instance(recipe_args.recipe)
         for stage in recipe_obj.stages:
             run_type = stage.infer_run_type()
             if run_type is StageRunType.ONESHOT:
@@ -337,7 +382,7 @@ def main(
 
     model = model_args.model
     if isinstance(model, str) or isinstance(model, PosixPath):
-        (teacher, _model_path, model) = initialize_model_from_path(
+        (model, teacher) = initialize_model_from_path(
             model_args,
             training_args,
         )
@@ -360,7 +405,10 @@ def main(
 
     # Load datasets
     stage_runner = StageRunner(
-        model_args=model_args, data_args=data_args, training_args=training_args
+        model_args=model_args,
+        data_args=data_args,
+        training_args=training_args,
+        recipe_args=recipe_args,
     )
     add_labels = training_args.do_train or training_args.run_stages
     stage_runner.populate_datasets(processor=processor, add_labels=add_labels)
@@ -368,13 +416,13 @@ def main(
     eval_dataset = stage_runner.get_dataset_split("validation")
     calib_dataset = stage_runner.get_dataset_split("calibration")
 
-    # Initialize our Trainer
     trainer = Trainer(
         model_init=get_session_model,
         teacher=teacher,
-        recipe=training_args.recipe,
-        recipe_args=training_args.recipe_args,
+        recipe=recipe_args.recipe,
+        recipe_args=recipe_args.recipe_args,
         args=training_args,
+        model_args=model_args,
         data_args=data_args,
         train_dataset=train_dataset or calib_dataset,
         eval_dataset=eval_dataset,
@@ -426,13 +474,13 @@ def main(
         != TrainingArguments.__dataclass_fields__["output_dir"].default
     ):
         model.save_pretrained(
-            training_args.output_dir, save_compressed=training_args.save_compressed
+            training_args.output_dir, save_compressed=model_args.save_compressed
         )
         if processor is not None:
             processor.save_pretrained(training_args.output_dir)
 
     # Clean up the CompressionSession before exit if requested
-    if training_args.clear_sparse_session:
+    if recipe_args.clear_sparse_session:
         reset_session()
 
 
diff --git a/src/llmcompressor/transformers/finetune/training_args.py b/src/llmcompressor/transformers/finetune/training_args.py
deleted file mode 100644
index c04fa2807..000000000
--- a/src/llmcompressor/transformers/finetune/training_args.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from transformers import TrainingArguments as HFTrainingArgs
-
-__all__ = ["TrainingArguments"]
-
-
-@dataclass
-class TrainingArguments(HFTrainingArgs):
-    """
-    Training arguments specific to LLM Compressor Transformers workflow
-
-    :param best_model_after_epoch (`int`, *optional*, defaults to None):
-        The epoch after which best model will be saved; used in conjunction
-        with `load_best_model_at_end` and `metric_for_best_model` training
-        arguments
-    """
-
-    recipe: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Path to a LLM Compressor sparsification recipe",
-        },
-    )
-    recipe_args: Optional[List[str]] = field(
-        default=None,
-        metadata={
-            "help": (
-                "List of recipe arguments to evaluate, of the format key1=value1 "
-                "key2=value2"
-            )
-        },
-    )
-    save_compressed: Optional[bool] = field(
-        default=True,
-        metadata={"help": "Whether to compress sparse models during save"},
-    )
-    do_oneshot: Optional[bool] = field(
-        default=False,
-        metadata={"help": "Whether to run one-shot calibration"},
-    )
-    run_stages: Optional[bool] = field(
-        default=False, metadata={"help": "Whether to trigger recipe stage by stage"}
-    )
-    oneshot_device: Optional[str] = field(
-        default="cuda:0",
-        metadata={"help": "Device to run oneshot calibration on"},
-    )
-    clear_sparse_session: Optional[bool] = field(
-        default=False,
-        metadata={"help": "Whether to clear CompressionSession data between runs."},
-    )
-    save_safetensors: Optional[bool] = field(
-        default=True,
-        metadata={
-            "help": "Use safetensors saving and loading for state dicts instead of "
-            "default torch.load and torch.save."
-        },
-    )
-    output_dir: str = field(
-        default="./output",
-        metadata={
-            "help": "The output directory where the model predictions and "
-            "checkpoints will be written."
-        },
-    )
-
-    @property
-    def place_model_on_device(self):
-        return False
diff --git a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py
index ec9951f6a..4cae242e5 100644
--- a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py
+++ b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py
@@ -100,7 +100,9 @@ def save_pretrained_wrapper(
     )
 
 
-def modify_save_pretrained(model: torch.nn.Module):
+def modify_save_pretrained(
+    model: torch.nn.Module,
+):
     """
     Overrides a PreTrainedModel's save_pretrained() method with a wrapped version that
     supports compression
@@ -209,8 +211,9 @@ def skip(*args, **kwargs):
         save_pretrained_wrapper._overriden = True
         return save_pretrained_wrapper
 
-    # wrap save_pretrained
-    model.save_pretrained = save_pretrained_compressed(model.save_pretrained)
+    # wrap save_pretrained if not already
+    if not getattr(model.save_pretrained, "_overriden", False):
+        model.save_pretrained = save_pretrained_compressed(model.save_pretrained)
 
 
 # HACK: Override the dtype_byte_size function in transformers to support float8 types
diff --git a/src/llmcompressor/transformers/sparsification/sparse_model.py b/src/llmcompressor/transformers/sparsification/sparse_model.py
index d7abc323a..57a9dbb78 100644
--- a/src/llmcompressor/transformers/sparsification/sparse_model.py
+++ b/src/llmcompressor/transformers/sparsification/sparse_model.py
@@ -7,7 +7,7 @@
 
 __all__ = [
     "SparseAutoModelForCausalLM",
-    "get_shared_processor_src",
+    "get_processor_from_model",
 ]
 
 
@@ -20,7 +20,7 @@ def from_pretrained(*args, **kwargs):
         return AutoModelForCausalLM.from_pretrained(*args, **kwargs)
 
 
-def get_shared_processor_src(student: Module, teacher: Optional[Module]) -> str:
+def get_processor_from_model(student: Module, teacher: Optional[Module]) -> str:
     """
     Get a processor/tokenizer source used for both student and teacher, assuming
     that they could be shared
diff --git a/src/llmcompressor/transformers/utils/arg_parser/__init__.py b/src/llmcompressor/transformers/utils/arg_parser/__init__.py
new file mode 100644
index 000000000..cbb9224af
--- /dev/null
+++ b/src/llmcompressor/transformers/utils/arg_parser/__init__.py
@@ -0,0 +1,6 @@
+# flake8: noqa
+
+from .data_arguments import DatasetArguments
+from .model_arguments import ModelArguments
+from .recipe_arguments import RecipeArguments
+from .training_arguments import DEFAULT_OUTPUT_DIR, TrainingArguments
diff --git a/src/llmcompressor/transformers/finetune/data/data_args.py b/src/llmcompressor/transformers/utils/arg_parser/data_arguments.py
similarity index 97%
rename from src/llmcompressor/transformers/finetune/data/data_args.py
rename to src/llmcompressor/transformers/utils/arg_parser/data_arguments.py
index 7d0bc14ce..50d3277f4 100644
--- a/src/llmcompressor/transformers/finetune/data/data_args.py
+++ b/src/llmcompressor/transformers/utils/arg_parser/data_arguments.py
@@ -5,7 +5,7 @@
 
 
 @dataclass
-class DVCDatasetTrainingArguments:
+class DVCDatasetArguments:
     """
     Arguments for training using DVC
     """
@@ -17,7 +17,7 @@ class DVCDatasetTrainingArguments:
 
 
 @dataclass
-class CustomDataTrainingArguments(DVCDatasetTrainingArguments):
+class CustomDatasetArguments(DVCDatasetArguments):
     """
     Arguments for training using custom datasets
     """
@@ -67,10 +67,10 @@ class CustomDataTrainingArguments(DVCDatasetTrainingArguments):
 
 
 @dataclass
-class DataTrainingArguments(CustomDataTrainingArguments):
+class DatasetArguments(CustomDatasetArguments):
     """
     Arguments pertaining to what data we are going to input our model for
-    training and eval
+    calibration, training or eval
 
     Using `HfArgumentParser` we can turn this class into argparse
     arguments to be able to specify them on the command line
diff --git a/src/llmcompressor/transformers/finetune/model_args.py b/src/llmcompressor/transformers/utils/arg_parser/model_arguments.py
similarity index 85%
rename from src/llmcompressor/transformers/finetune/model_args.py
rename to src/llmcompressor/transformers/utils/arg_parser/model_arguments.py
index c81900ee2..ce424812a 100644
--- a/src/llmcompressor/transformers/finetune/model_args.py
+++ b/src/llmcompressor/transformers/utils/arg_parser/model_arguments.py
@@ -5,7 +5,9 @@
 @dataclass
 class ModelArguments:
     """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from
+    Model variables used for oneshot calibration, training or finetuning and
+    stage runners (combination of oneshot and finetune going back and forth)
+
     """
 
     model: str = field(
@@ -44,17 +46,7 @@ class ModelArguments:
         default=None,
         metadata={"help": "Where to store the pretrained data from huggingface.co"},
     )
-    use_fast_tokenizer: bool = field(
-        default=True,
-        metadata={"help": "Whether to use one of the fast tokenizers. Default True"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={
-            "help": "The specific model version to use "
-            "(can be a branch name, tag name or commit id)"
-        },
-    )
+
     use_auth_token: bool = field(
         default=False,
         metadata={
@@ -83,3 +75,18 @@ class ModelArguments:
             "repositories you trust and in which you have read the code"
         },
     )
+    save_compressed: Optional[bool] = field(
+        default=True,
+        metadata={"help": "Whether to compress sparse models during save"},
+    )
+    oneshot_device: Optional[str] = field(
+        default="cuda:0",
+        metadata={"help": "Device to run oneshot calibration on"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={
+            "help": "The specific model version to use "
+            "(can be a branch name, tag name or commit id)"
+        },
+    )
diff --git a/src/llmcompressor/transformers/utils/arg_parser/recipe_arguments.py b/src/llmcompressor/transformers/utils/arg_parser/recipe_arguments.py
new file mode 100644
index 000000000..fbe535d7e
--- /dev/null
+++ b/src/llmcompressor/transformers/utils/arg_parser/recipe_arguments.py
@@ -0,0 +1,32 @@
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+
+@dataclass
+class RecipeArguments:
+    """Recipe and session variables"""
+
+    recipe: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "Path to a LLM Compressor sparsification recipe",
+        },
+    )
+    recipe_args: Optional[List[str]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "List of recipe arguments to evaluate, of the format key1=value1 "
+                "key2=value2"
+            )
+        },
+    )
+    clear_sparse_session: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to clear CompressionSession/CompressionLifecycle ",
+                "data between runs.",
+            )
+        },
+    )
diff --git a/src/llmcompressor/transformers/utils/arg_parser/training_arguments.py b/src/llmcompressor/transformers/utils/arg_parser/training_arguments.py
new file mode 100644
index 000000000..7b61193b0
--- /dev/null
+++ b/src/llmcompressor/transformers/utils/arg_parser/training_arguments.py
@@ -0,0 +1,32 @@
+from dataclasses import dataclass, field
+from typing import Optional
+
+from transformers import TrainingArguments as HFTrainingArgs
+
+__all__ = ["TrainingArguments", "DEFAULT_OUTPUT_DIR"]
+
+DEFAULT_OUTPUT_DIR = "./output"
+
+
+@dataclass
+class TrainingArguments(HFTrainingArgs):
+    """
+    Training arguments specific to LLM Compressor Transformers workflow using
+    HFTrainingArgs as base class
+
+    """
+
+    do_oneshot: Optional[bool] = field(
+        default=False,
+        metadata={"help": "Whether to run one-shot calibration in stages"},
+    )
+    run_stages: Optional[bool] = field(
+        default=False, metadata={"help": "Whether to trigger recipe stage by stage"}
+    )
+    output_dir: str = field(
+        default=DEFAULT_OUTPUT_DIR,
+        metadata={
+            "help": "The output directory where the model predictions and "
+            "checkpoints will be written."
+        },
+    )
diff --git a/src/llmcompressor/transformers/utils/arg_parser/utils.py b/src/llmcompressor/transformers/utils/arg_parser/utils.py
new file mode 100644
index 000000000..48455fa15
--- /dev/null
+++ b/src/llmcompressor/transformers/utils/arg_parser/utils.py
@@ -0,0 +1,30 @@
+from dataclasses import fields
+from typing import Any, Dict, Union
+
+from .data_arguments import DatasetArguments
+from .model_arguments import ModelArguments
+from .recipe_arguments import RecipeArguments
+from .training_arguments import TrainingArguments
+
+__all__ = [
+    "get_dataclass_as_dict",
+]
+
+
+def get_dataclass_as_dict(
+    dataclass_instance: Union[
+        "ModelArguments", "RecipeArguments", "DatasetArguments", "TrainingArguments"
+    ],
+    dataclass_class: Union[
+        "ModelArguments", "RecipeArguments", "DatasetArguments", "TrainingArguments"
+    ],
+) -> Dict[str, Any]:
+    """
+    Get the dataclass instance attributes as a dict, neglicting the inherited class.
+    Ex. dataclass_class=TrainingArguments will ignore HFTrainignArguments
+
+    """
+    return {
+        field.name: getattr(dataclass_instance, field.name)
+        for field in fields(dataclass_class)
+    }
diff --git a/src/llmcompressor/transformers/utils/helpers.py b/src/llmcompressor/transformers/utils/helpers.py
index 1263bb004..7f17e6a6c 100644
--- a/src/llmcompressor/transformers/utils/helpers.py
+++ b/src/llmcompressor/transformers/utils/helpers.py
@@ -10,7 +10,10 @@
 from transformers.trainer_utils import get_last_checkpoint
 
 if TYPE_CHECKING:
-    from llmcompressor.transformers import ModelArguments, TrainingArguments
+    from llmcompressor.transformers.utils.arg_parser import (
+        ModelArguments,
+        TrainingArguments,
+    )
 
 __all__ = [
     "RECIPE_FILE_NAME",
diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py
index 13eab66c9..e1765d37a 100644
--- a/tests/llmcompressor/transformers/compression/test_quantization.py
+++ b/tests/llmcompressor/transformers/compression/test_quantization.py
@@ -59,10 +59,9 @@ def _run_oneshot(model, recipe, dataset, output_dir):
         max_seq_length = 512
         pad_to_max_length = False
 
-        oneshot(
+        oneshot_run = oneshot(
             model=model,
             dataset=dataset,
-            overwrite_output_dir=True,
             output_dir=output_dir,
             max_seq_length=max_seq_length,
             num_calibration_samples=num_calibration_samples,
@@ -72,10 +71,8 @@ def _run_oneshot(model, recipe, dataset, output_dir):
             splits={"calibration": "train_gen[:5%]"},
             save_compressed=False,
         )
-        from llmcompressor.pytorch.model_load.helpers import get_session_model
 
-        # note: get_session_model() is None outside of function scope
-        return get_session_model()
+        return oneshot_run.model
 
     def _get_quant_info(self, model):
         quant_info_weights = {}
diff --git a/tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py b/tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py
index 64514b252..137da558e 100644
--- a/tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py
+++ b/tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py
@@ -15,6 +15,7 @@
     format_calibration_data,
 )
 from llmcompressor.transformers.finetune.runner import StageRunner
+from llmcompressor.transformers.utils.recipe_args import RecipeArguments
 
 
 @pytest.mark.unit
@@ -283,8 +284,12 @@ def test_split_loading(self, split_def):
         )
         training_args = TrainingArguments(do_train=True, output_dir="dummy")
         model_args = ModelArguments(model=None)
+        recipe_args = RecipeArguments()
         stage_runner = StageRunner(
-            model_args=model_args, data_args=data_args, training_args=training_args
+            model_args=model_args,
+            data_args=data_args,
+            training_args=training_args,
+            recipe_args=recipe_args,
         )
         stage_runner.populate_datasets(processor=self.tiny_llama_tokenizer)
 
@@ -322,6 +327,7 @@ def preprocess(sample):
                 dataset=tokenized_dataset, shuffle_calibration_samples=False
             ),
             training_args=TrainingArguments(do_oneshot=True),
+            recipe_args=RecipeArguments(),
         )
         stage_runner.populate_datasets(processor=None)
         calib_dataset = stage_runner.get_dataset_split("calibration")
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
index 870503496..33311a536 100644
--- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
+++ b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
@@ -23,6 +23,8 @@ def _test_oneshot_and_finetune(self):
         if self.dataset == "ultrachat-200k":
             splits = {"train": "train_gen[:30%]", "calibration": "train_gen[30%:40%]"}
 
+        shutil.rmtree(self.output)
+
         apply(
             model=self.model,
             dataset=self.dataset,
@@ -53,7 +55,8 @@ def _test_oneshot_and_finetune(self):
     def tearDown(self):
         # TODO: we get really nice stats from finetune that we should log
         # stored in results.json
-        shutil.rmtree(self.output)
+        # shutil.rmtree(self.output)
+        pass
 
 
 @pytest.mark.integration
diff --git a/tests/llmcompressor/transformers/gptq/test_oneshot.py b/tests/llmcompressor/transformers/gptq/test_oneshot.py
index 7f1a1ec99..d75386b94 100644
--- a/tests/llmcompressor/transformers/gptq/test_oneshot.py
+++ b/tests/llmcompressor/transformers/gptq/test_oneshot.py
@@ -75,7 +75,6 @@ def test_oneshot_application(self):
             model=self.model,
             dataset=self.dataset,
             output_dir=self.output,
-            overwrite_output_dir=True,
             recipe=self.recipe,
             oneshot_device=self.device,
             num_calibration_samples=9,
diff --git a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py
index 2f6c51ebb..5d2bafdc3 100644
--- a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py
+++ b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py
@@ -6,6 +6,7 @@
 import yaml
 from parameterized import parameterized_class
 
+from llmcompressor.core import active_session
 from tests.testing_utils import parse_params, requires_gpu
 
 CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs"
@@ -20,14 +21,12 @@ def _test_consecutive_runs(
     ):
         import math
 
-        from llmcompressor.core import active_session
-        from llmcompressor.pytorch.model_load.helpers import get_session_model
         from llmcompressor.pytorch.utils.helpers import tensor_sparsity
         from llmcompressor.transformers import oneshot
         from llmcompressor.utils.pytorch import qat_active
 
         # test recipe with 50% sparsity, quantization and smoothquant
-        oneshot(
+        oneshot = oneshot(
             model=self.model,
             dataset=self.dataset,
             num_calibration_samples=num_calibration_samples,
@@ -36,7 +35,7 @@ def _test_consecutive_runs(
             oneshot_device=self.device,
             clear_sparse_session=False,
         )
-        first_tiny_model = get_session_model()
+        first_tiny_model = oneshot.model
         layer_0_sparse = tensor_sparsity(
             first_tiny_model.model.layers[0].self_attn.k_proj.weight
         )
@@ -47,10 +46,9 @@ def _test_consecutive_runs(
         session_recipe = session.lifecycle.recipe_container.compiled_recipe
         stages = [stage.group for stage in session_recipe.stages]
         self.assertEqual(len(stages), 1)
-        session.reset()
 
         # reload saved model and up sparsity to 0.7
-        oneshot(
+        second_oneshot = oneshot(
             model=self.output_first,
             dataset=self.dataset,
             num_calibration_samples=num_calibration_samples,
@@ -60,7 +58,7 @@ def _test_consecutive_runs(
             clear_sparse_session=False,
         )
 
-        second_tiny_model = get_session_model()
+        second_tiny_model = second_oneshot.model
         layer_0_sparse = tensor_sparsity(
             second_tiny_model.model.layers[0].self_attn.k_proj.weight
         )
diff --git a/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py b/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py
index 5095fe827..a48f0d8d1 100644
--- a/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py
+++ b/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py
@@ -5,7 +5,6 @@
 from compressed_tensors.utils import tensor_follows_mask_structure
 from parameterized import parameterized_class
 
-from llmcompressor.core import reset_session
 from tests.testing_utils import parse_params
 
 MASK_STRUCTURE_CONFIGS_DIRECTORY = (
@@ -47,7 +46,6 @@ def test_mask_structure_preserved(self):
 
         import torch
 
-        from llmcompressor.pytorch.model_load.helpers import get_session_model
         from llmcompressor.pytorch.utils.helpers import tensor_sparsity
         from llmcompressor.transformers import oneshot
         from llmcompressor.utils.pytorch import qat_active
@@ -55,7 +53,7 @@ def test_mask_structure_preserved(self):
         tolerance = 1e-3
         num_calibration_samples = 16
 
-        oneshot(
+        oneshot = oneshot(
             model=self.model,
             dataset=self.dataset,
             num_calibration_samples=num_calibration_samples,
@@ -65,7 +63,7 @@ def test_mask_structure_preserved(self):
             clear_sparse_session=False,
             save_compressed=False,
         )
-        first_tiny_model = get_session_model()
+        first_tiny_model = oneshot.model
         targetted_layer = first_tiny_model.model.layers[0].self_attn.k_proj
         target_layer_sparsity = tensor_sparsity(targetted_layer.weight)
         initial_mask = first_tiny_model.model.layers[0].self_attn.k_proj.weight == 0
@@ -77,9 +75,7 @@ def test_mask_structure_preserved(self):
         # mask structure is as expected, i.e same as self.recipe_mask_structure
         assert tensor_follows_mask_structure(initial_mask, self.recipe_mask_structure)
 
-        reset_session()
-
-        oneshot(
+        second_oneshot = oneshot(
             model=self.output_first,
             dataset=self.dataset,
             num_calibration_samples=num_calibration_samples,
@@ -90,7 +86,7 @@ def test_mask_structure_preserved(self):
             save_compressed=False,
         )
 
-        second_tiny_model = get_session_model()
+        second_tiny_model = second_oneshot.model
 
         # model is loaded
         assert second_tiny_model is not None
diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py
index 0ef7f872d..f370d5ee1 100644
--- a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py
+++ b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py
@@ -26,11 +26,10 @@ def setUp(self):
         self.output = "./oneshot_output"
 
     def test_sparsities(self):
-        from llmcompressor.pytorch.model_load.helpers import get_session_model
         from llmcompressor.pytorch.utils.helpers import tensor_sparsity
         from llmcompressor.transformers import oneshot
 
-        oneshot(
+        oneshot = oneshot(
             model=self.model,
             dataset=self.dataset,
             oneshot_device=self.device,
@@ -42,7 +41,7 @@ def test_sparsities(self):
             output_dir=self.output,
         )
 
-        model = get_session_model()
+        model = oneshot.model
 
         layer_1_sparse = tensor_sparsity(model.model.layers[1].self_attn.k_proj.weight)
         assert math.isclose(layer_1_sparse.item(), self.sparsity, rel_tol=1e-4)
diff --git a/tests/llmcompressor/transformers/oneshot/test_cli.py b/tests/llmcompressor/transformers/oneshot/test_cli.py
index 5780ca46f..803d624a3 100644
--- a/tests/llmcompressor/transformers/oneshot/test_cli.py
+++ b/tests/llmcompressor/transformers/oneshot/test_cli.py
@@ -41,16 +41,20 @@ def test_one_shot_cli(self):
             "--recipe",
             self.recipe,
             "--num_calibration_samples",
-            "10",
+            "16",
             "--pad_to_max_length",
             "False",
         ]
 
         if len(self.additional_args) > 0:
             cmd.extend(self.additional_args)
+
         res = run_cli_command(cmd)
-        self.assertEqual(res.returncode, 0)
-        print(res.stdout)
+
+        # oneshot has return arg
+        self.assertIsNone(res.stderr)
 
     def tearDown(self):
-        shutil.rmtree(self.output)
+        # if a test case was skipped
+        if hasattr(self, "output"):
+            shutil.rmtree(self.output)
diff --git a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
index 92e600de9..2d972970e 100644
--- a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
+++ b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
@@ -90,7 +90,7 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
         rel_tol=1e-3,
     )
 
-    inferred_structure = SparsityConfigMetadata.infer_sparsity_structure()
+    inferred_structure = SparsityConfigMetadata.infer_sparsity_structure(model)
     assert inferred_structure == "0:0"
 
     model.save_pretrained(
@@ -167,8 +167,6 @@ def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed):
     ],
 )
 def test_quant_model_reload(format, dtype, tmp_path):
-    from llmcompressor.pytorch.model_load.helpers import get_session_model
-
     recipe_str = (
         "tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml"
     )
@@ -182,7 +180,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
     splits = {"calibration": "train[:10%]"}
 
     # create a quantized model
-    oneshot(
+    oneshot_run = oneshot(
         model=model_path,
         dataset=dataset,
         num_calibration_samples=num_calibration_samples,
@@ -195,7 +193,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
     )
 
     # Fetch the oneshot model
-    model = get_session_model()
+    model = oneshot_run.model
     og_state_dict = model.state_dict()
     save_path_compressed = tmp_path / "compressed"