Skip to content

Commit

Permalink
Merge pull request #6 from Shekswess/develop
Browse files Browse the repository at this point in the history
[Fix] Updating SynthGenAI with improvements for batch generation and setting default language
  • Loading branch information
Shekswess authored Jan 4, 2025
2 parents ca04d98 + 3e25105 commit 7494d4e
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "synthgenai"
version = "0.2.0"
version = "0.2.1"
description = "SynthGenAI - Package for generating Synthetic Datasets."
readme = "README.md"
requires-python = ">=3.10"
Expand Down
3 changes: 2 additions & 1 deletion synthgenai/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class DatasetType(str, Enum):
INSTRUCTION = "Instruction Dataset"
PREFERENCE = "Preference Dataset"
SUMMARIZATION = "Summarization Dataset"
SENTIMENT_ANALYSIS = "Sentiment Analysis Dataset"


class LLMConfig(BaseModel):
Expand Down Expand Up @@ -50,7 +51,7 @@ class DatasetConfig(BaseModel):

topic: str = Field(..., min_length=1)
domains: list[str] = Field(..., min_items=1)
language: str = Field(..., min_length=1)
language: str = Field("English", min_length=1)
additional_description: str = Field("", max_length=1000)
num_entries: int = Field(1000, gt=1)

Expand Down
3 changes: 2 additions & 1 deletion synthgenai/dataset_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ async def _agenerate_entries(self):
batch_keywords = keywords[i : i + BATCH_SIZE]
tasks = [self._agenerate_entry(keyword) for keyword in batch_keywords]
entries = await asyncio.gather(*tasks)
time.sleep(10)
random_wait_time = random.randint(10, 20)
time.sleep(random_wait_time)
for keyword, entry in zip(batch_keywords, entries):
if entry:
data.append(entry)
Expand Down

0 comments on commit 7494d4e

Please sign in to comment.