Skip to content

Commit

Permalink
Remove method
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho committed Aug 21, 2024
1 parent 224a9f4 commit 78efd86
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 142 deletions.
19 changes: 0 additions & 19 deletions sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,25 +560,6 @@ def detect_from_dataframes(self, data):

self._detect_relationships(data)

def detect_table_from_csv(self, table_name, filepath, read_csv_parameters=None):
"""Detect the metadata for a table from a csv file.
Args:
table_name (str):
Name of the table to detect.
filepath (str):
String that represents the ``path`` to the ``csv`` file.
read_csv_parameters (dict):
A python dictionary of with string and value accepted by ``pandas.read_csv``
function. Defaults to ``None``.
"""
self._validate_table_not_detected(table_name)
table = SingleTableMetadata()
data = _load_data_from_csv(filepath, read_csv_parameters)
table._detect_columns(data)
self.tables[table_name] = table
self._log_detected_table(table)

def detect_from_csvs(self, folder_name, read_csv_parameters=None):
"""Detect the metadata for all tables in a folder of csv files.
Expand Down
50 changes: 0 additions & 50 deletions tests/integration/metadata/test_multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,56 +287,6 @@ def test_detect_from_csvs(tmp_path):
assert metadata.to_dict() == expected_metadata


def test_detect_table_from_csv(tmp_path):
"""Test the ``detect_table_from_csv`` method."""
# Setup
real_data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels')

metadata = MultiTableMetadata()

for table_name, dataframe in real_data.items():
csv_path = tmp_path / f'{table_name}.csv'
dataframe.to_csv(csv_path, index=False)

# Run
metadata.detect_table_from_csv('hotels', tmp_path / 'hotels.csv')

# Assert
metadata.update_column(
table_name='hotels',
column_name='city',
sdtype='categorical',
)
metadata.update_column(
table_name='hotels',
column_name='state',
sdtype='categorical',
)
metadata.update_column(
table_name='hotels',
column_name='classification',
sdtype='categorical',
)
expected_metadata = {
'tables': {
'hotels': {
'columns': {
'hotel_id': {'sdtype': 'id'},
'city': {'sdtype': 'categorical'},
'state': {'sdtype': 'categorical'},
'rating': {'sdtype': 'numerical'},
'classification': {'sdtype': 'categorical'},
},
'primary_key': 'hotel_id',
}
},
'relationships': [],
'METADATA_SPEC_VERSION': 'MULTI_TABLE_V1',
}

assert metadata.to_dict() == expected_metadata


def test_get_column_names():
"""Test the ``get_column_names`` method."""
# Setup
Expand Down
74 changes: 1 addition & 73 deletions tests/unit/metadata/test_multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2310,82 +2310,10 @@ def test__detect_relationships_circular(self):
instance.add_relationship.assert_called_once_with('users', 'sessions', 'user_id', 'user_id')
assert instance.tables['sessions'].columns['user_id']['sdtype'] == 'categorical'

@patch('sdv.metadata.multi_table.LOGGER')
@patch('sdv.metadata.multi_table.SingleTableMetadata')
@patch('sdv.metadata.multi_table._load_data_from_csv')
def test_detect_table_from_csv(self, load_csv_mock, single_table_mock, log_mock):
"""Test the ``detect_table_from_csv`` method.
If the table does not already exist, a ``SingleTableMetadata`` instance
should be created and call the ``detect_from_csv`` method.
Setup:
- Mock the ``SingleTableMetadata`` class and the logger.
Assert:
- Table should be added to ``self.tables``.
"""
# Setup
metadata = MultiTableMetadata()
fake_data = Mock()
load_csv_mock.return_value = fake_data
single_table_mock.return_value.to_dict.return_value = {
'columns': {'a': {'sdtype': 'numerical'}}
}

# Run
metadata.detect_table_from_csv('table', 'path.csv')

# Assert
load_csv_mock.assert_called_once_with('path.csv', None)
single_table_mock.return_value._detect_columns.assert_called_once_with(fake_data)
assert metadata.tables == {'table': single_table_mock.return_value}

expected_log_calls = call(
'Detected metadata:\n'
'{\n'
' "columns": {\n'
' "a": {\n'
' "sdtype": "numerical"\n'
' }\n'
' }'
'\n}'
)
log_mock.info.assert_has_calls([expected_log_calls])

def test_detect_table_from_csv_table_already_exists(self):
"""Test the ``detect_table_from_csv`` method.
If the table already exists, an error should be raised.
Setup:
- Set the ``_tables`` dict to already have the table.
Input:
- Table name.
- Path.
Side effect:
- An error should be raised.
"""
# Setup
metadata = MultiTableMetadata()
metadata.tables = {'table': Mock()}

# Run
error_message = (
"Metadata for table 'table' already exists. Specify a new table name or "
'create a new MultiTableMetadata object for other data sources.'
)
with pytest.raises(InvalidMetadataError, match=error_message):
metadata.detect_table_from_csv('table', 'path.csv')

@patch('sdv.metadata.multi_table._load_data_from_csv')
def test_detect_from_csvs(self, load_data_mock, tmp_path):
"""Test the ``detect_from_csvs`` method.
The method should call ``detect_table_from_csv`` for each csv in the folder.
"""
"""Test the ``detect_from_csvs`` method."""
# Setup
instance = MultiTableMetadata()
instance.detect_table_from_dataframe = Mock()
Expand Down

0 comments on commit 78efd86

Please sign in to comment.