Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Jan 23, 2025
1 parent d6f6b5c commit 5d7ba7c
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 3 deletions.
33 changes: 33 additions & 0 deletions tests/integration/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import re
from copy import deepcopy

import pandas as pd
import pytest

from sdv.datasets.demo import download_demo
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata import Metadata
from sdv.metadata.multi_table import MultiTableMetadata
from sdv.metadata.single_table import SingleTableMetadata
Expand Down Expand Up @@ -520,3 +522,34 @@ def test_anonymize():

assert anonymized.tables['table1'].to_dict() == table1_metadata.anonymize().to_dict()
assert anonymized.tables['table2'].to_dict() == table2_metadata.anonymize().to_dict()


def test_detect_from_dataframes_invalid_format():
"""Test the ``detect_from_dataframes`` method with an invalid data format."""
# Setup
dict_data = [
{
'key1': i,
'key2': f'string_{i}',
'key3': 1.5,
}
for i in range(100)
]
data = {
'table_1': pd.DataFrame({
'dict_column': dict_data,
'numerical': [1.2] * 100,
}),
'table_2': pd.DataFrame({
'numerical': [1.5] * 10,
'categorical': ['A'] * 10,
}),
}
expected_error = re.escape(
"Unable to detect metadata for table 'table_1' column 'dict_column' due to an "
"invalid data format.\n TypeError: unhashable type: 'dict'"
)

# Run / Assert
with pytest.raises(InvalidMetadataError, match=expected_error):
Metadata.detect_from_dataframes(data)
2 changes: 1 addition & 1 deletion tests/unit/metadata/test_multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2499,7 +2499,7 @@ def test_detect_table_from_dataframe(self, single_table_mock, log_mock):
metadata.detect_table_from_dataframe('table', data)

# Assert
single_table_mock.return_value._detect_columns.assert_called_once_with(data)
single_table_mock.return_value._detect_columns.assert_called_once_with(data, 'table')
assert metadata.tables == {'table': single_table_mock.return_value}

expected_log_calls = call(
Expand Down
28 changes: 26 additions & 2 deletions tests/unit/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,9 +1277,8 @@ def test__detect_columns_with_error(self, mock__get_datetime_format):

expected_error_message = re.escape(
"Unsupported data type for column 'complex_dtype' (kind: c)."
"The valid data types are: 'object', 'int', 'float', 'datetime', 'bool'."
" The valid data types are: 'object', 'int', 'float', 'datetime', 'bool'."
)

with pytest.raises(InvalidMetadataError, match=expected_error_message):
instance._detect_columns(non_supported_data)

Expand All @@ -1296,6 +1295,31 @@ def test__detect_columns_with_error(self, mock__get_datetime_format):
instance._determine_sdtype_for_objects.assert_called_once()
mock__get_datetime_format.assert_called_once()

def test__detect_columns_invalid_data_format(self):
"""Test the ``_detect_columns`` method with an invalid data format."""
# Setup
instance = SingleTableMetadata()
dict_data = [
{
'key1': i,
'key2': f'string_{i}',
'key3': np.random.random(), # random float
}
for i in range(100)
]
data = pd.DataFrame({
'dict_column': dict_data,
'numerical': [1.2] * 100,
})
expected_error_message = re.escape(
"Unable to detect metadata for column 'dict_column' due to an invalid data format."
"\n TypeError: unhashable type: 'dict'"
)

# Run / Assert
with pytest.raises(InvalidMetadataError, match=expected_error_message):
instance._detect_columns(data)

def test__detect_primary_key_missing_sdtypes(self):
"""The method should raise an error if not all sdtypes were detected."""
# Setup
Expand Down

0 comments on commit 5d7ba7c

Please sign in to comment.