From ccb0bc2998f35ede3118b490e0ce387244898305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20P=2E=20D=C3=BCrholt?= Date: Wed, 28 Feb 2024 16:42:09 +0100 Subject: [PATCH] Add Task Feature (#360) --- bofire/data_models/domain/features.py | 22 ++++++++- bofire/data_models/features/api.py | 3 ++ bofire/data_models/features/categorical.py | 3 +- bofire/data_models/features/continuous.py | 3 +- bofire/data_models/features/task.py | 28 +++++++++++ .../bofire/data_models/features/test_task.py | 9 ++++ tests/bofire/data_models/specs/features.py | 47 +++++++++++++++++++ tests/bofire/data_models/specs/inputs.py | 27 +++++++++++ 8 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 bofire/data_models/features/task.py create mode 100644 tests/bofire/data_models/features/test_task.py diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py index 9ff8a80c9..c53720aa9 100644 --- a/bofire/data_models/domain/features.py +++ b/bofire/data_models/domain/features.py @@ -27,6 +27,7 @@ Input, MolecularInput, Output, + TaskInput, ) from bofire.data_models.filters import filter_by_attribute, filter_by_class from bofire.data_models.molfeatures.api import MolFeatures @@ -181,6 +182,19 @@ class Inputs(Features): type: Literal["Inputs"] = "Inputs" features: Sequence[AnyInput] = Field(default_factory=lambda: []) + @field_validator("features") + @classmethod + def validate_only_one_task_input(cls, features: Sequence[AnyInput]): + filtered = filter_by_class( + features, + includes=TaskInput, + excludes=None, + exact=False, + ) + if len(filtered) > 1: + raise ValueError(f"Only one `TaskInput` is allowed, got {len(filtered)}.") + return features + def get_fixed(self) -> "Inputs": """Gets all features in `self` that are fixed and returns them as new `Inputs` object. @@ -702,7 +716,10 @@ def __call__( ] + [ ( - pd.Series(data=feat(experiments.filter(regex=f"{feat.key}(.*)_prob")), name=f"{feat.key}_pred") # type: ignore + pd.Series( + data=feat(experiments.filter(regex=f"{feat.key}(.*)_prob")), + name=f"{feat.key}_pred", + ) # type: ignore if predictions else experiments[feat.key] ) @@ -766,7 +783,8 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame: + [ [f"{key}_pred", f"{key}_sd"] for key in self.get_keys_by_objective( - excludes=Objective, includes=None # type: ignore + excludes=Objective, + includes=None, # type: ignore ) ] ) diff --git a/bofire/data_models/features/api.py b/bofire/data_models/features/api.py index 5dc9b7bd8..ad46392cb 100644 --- a/bofire/data_models/features/api.py +++ b/bofire/data_models/features/api.py @@ -13,6 +13,7 @@ MolecularInput, ) from bofire.data_models.features.numerical import NumericalInput +from bofire.data_models.features.task import TaskInput AbstractFeature = Union[ Feature, @@ -32,6 +33,7 @@ CategoricalDescriptorInput, MolecularInput, CategoricalMolecularInput, + TaskInput, ] AnyInput = Union[ @@ -42,6 +44,7 @@ CategoricalDescriptorInput, MolecularInput, CategoricalMolecularInput, + TaskInput, ] AnyOutput = Union[ContinuousOutput, CategoricalOutput] diff --git a/bofire/data_models/features/categorical.py b/bofire/data_models/features/categorical.py index 6e93e1352..c2f52843b 100644 --- a/bofire/data_models/features/categorical.py +++ b/bofire/data_models/features/categorical.py @@ -332,8 +332,7 @@ def __str__(self) -> str: class CategoricalOutput(Output): type: Literal["CategoricalOutput"] = "CategoricalOutput" - # order_id: ClassVar[int] = 8 - order_id: ClassVar[int] = 9 + order_id: ClassVar[int] = 10 categories: TCategoryVals objective: AnyCategoricalObjective diff --git a/bofire/data_models/features/continuous.py b/bofire/data_models/features/continuous.py index 3779ceb2e..21585f5c5 100644 --- a/bofire/data_models/features/continuous.py +++ b/bofire/data_models/features/continuous.py @@ -186,8 +186,7 @@ class ContinuousOutput(Output): """ type: Literal["ContinuousOutput"] = "ContinuousOutput" - # order_id: ClassVar[int] = 7 - order_id: ClassVar[int] = 8 + order_id: ClassVar[int] = 9 unit: Optional[str] = None objective: Optional[AnyObjective] = Field( diff --git a/bofire/data_models/features/task.py b/bofire/data_models/features/task.py new file mode 100644 index 000000000..86cf25854 --- /dev/null +++ b/bofire/data_models/features/task.py @@ -0,0 +1,28 @@ +from typing import ClassVar, List, Literal + +import numpy as np +from pydantic import model_validator + +from bofire.data_models.features.categorical import CategoricalInput + + +class TaskInput(CategoricalInput): + order_id: ClassVar[int] = 8 + type: Literal["TaskInput"] = "TaskInput" + fidelities: List[int] = [] + + @model_validator(mode="after") + def validate_fidelities(self): + n_tasks = len(self.categories) + if self.fidelities == []: + for _ in range(n_tasks): + self.fidelities.append(0) + if len(self.fidelities) != n_tasks: + raise ValueError( + "Length of fidelity lists must be equal to the number of tasks" + ) + if list(set(self.fidelities)) != list(range(np.max(self.fidelities) + 1)): + raise ValueError( + "Fidelities must be a list containing integers, starting from 0 and increasing by 1" + ) + return self diff --git a/tests/bofire/data_models/features/test_task.py b/tests/bofire/data_models/features/test_task.py new file mode 100644 index 000000000..df32452b5 --- /dev/null +++ b/tests/bofire/data_models/features/test_task.py @@ -0,0 +1,9 @@ +from bofire.data_models.features.api import TaskInput + + +def test_validate_fidelities_default_generation(): + feat = TaskInput( + key="task", + categories=["p1", "p2"], + ) + assert feat.fidelities == [0, 0] diff --git a/tests/bofire/data_models/specs/features.py b/tests/bofire/data_models/specs/features.py index 2ac45ea00..6889b09c2 100644 --- a/tests/bofire/data_models/specs/features.py +++ b/tests/bofire/data_models/specs/features.py @@ -153,3 +153,50 @@ "allowed": [True, True, True, True], }, ) + + +specs.add_valid( + features.TaskInput, + lambda: { + "key": str(uuid.uuid4()), + "categories": [ + "process_1", + "process_2", + "process_3", + ], + "allowed": [True, True, True], + "fidelities": [0, 1, 2], + }, +) + +specs.add_invalid( + features.TaskInput, + lambda: { + "key": str(uuid.uuid4()), + "categories": [ + "process_1", + "process_2", + "process_3", + ], + "allowed": [True, True, True], + "fidelities": [0, 1], + }, + error=ValueError, + message="Length of fidelity lists must be equal to the number of tasks", +) + +specs.add_invalid( + features.TaskInput, + lambda: { + "key": str(uuid.uuid4()), + "categories": [ + "process_1", + "process_2", + "process_3", + ], + "allowed": [True, True, True], + "fidelities": [0, 1, 3], + }, + error=ValueError, + message="Fidelities must be a list containing integers, starting from 0 and increasing by 1", +) diff --git a/tests/bofire/data_models/specs/inputs.py b/tests/bofire/data_models/specs/inputs.py index e21e8b70c..7a0d82897 100644 --- a/tests/bofire/data_models/specs/inputs.py +++ b/tests/bofire/data_models/specs/inputs.py @@ -3,6 +3,7 @@ CategoricalInput, ContinuousInput, ContinuousOutput, + TaskInput, ) from tests.bofire.data_models.specs.specs import Specs @@ -20,6 +21,19 @@ }, ) +specs.add_valid( + Inputs, + lambda: { + "features": [ + CategoricalInput( + key="a", categories=["1", "2"], allowed=[True, True] + ).model_dump(), + ContinuousInput(key="b", bounds=(0, 1)).model_dump(), + TaskInput(key="c", categories=["a", "b", "c"]).model_dump(), + ], + }, +) + specs.add_invalid( Inputs, @@ -43,3 +57,16 @@ error=ValueError, message="Feature keys are not unique.", ) + +specs.add_invalid( + Inputs, + lambda: { + "features": [ + CategoricalInput(key="a", categories=["1", "2"]), + TaskInput(key="b", categories=["a", "b", "c"]), + TaskInput(key="c", categories=["a", "b", "c"]), + ], + }, + error=ValueError, + message="Only one `TaskInput` is allowed, got 2.", +)