Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
garrett4wade committed Sep 3, 2024
1 parent bcb4c99 commit 5e4b8c4
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions realhf/experiments/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import itertools
import pprint
import re
from collections import defaultdict
from typing import *

Expand Down Expand Up @@ -150,6 +151,9 @@ class CommonExperimentConfig(Experiment):
:type nodelist: str or None
:param seed: Random seed.
:type seed: int
:param cache_clear_freq: The cache of data transfer will be cleared after each ``cache_clear_freq`` steps.
If None, will not clear the cache. Set to a small number, e.g., 1, if OOM or CUDA OOM occurs.
:type cache_clear_freq: int or None
:param exp_ctrl: The save and evaluation control of the experiment.
:type exp_ctrl: ExperimentSaveEvalControl
"""
Expand All @@ -172,6 +176,7 @@ class CommonExperimentConfig(Experiment):
n_gpus_per_node: int = 8
nodelist: Optional[str] = None
seed: int = 1
cache_clear_freq: Optional[int] = 10
exp_ctrl: ExperimentSaveEvalControl = dataclasses.field(
default_factory=ExperimentSaveEvalControl
)
Expand Down Expand Up @@ -413,8 +418,8 @@ def _get_model_worker_configs(
seed=self.seed,
shards=[],
datasets=self.datasets,
cuda_cache_cleanliness=False,
cuda_cache_clear_freq=10,
cuda_cache_cleanliness=self.cache_clear_freq is not None,
cuda_cache_clear_freq=self.cache_clear_freq,
tokenizer_name_or_path=self.tokenizer_name_or_path,
)

Expand Down

0 comments on commit 5e4b8c4

Please sign in to comment.