Please indicate the following details about the environment in which you found the bug:
METRICS = [
('BinaryDecisionTreeClassifier', {
'target': 'label',
}),
...
]
rs = sdgym.benchmark_single_table(
synthesizers=['TVAESynthesizer'],
show_progress=True,
sdv_datasets=['adult', 'census', 'intrusion'],
sdmetrics=METRICS,
)
0%| | 0/1 [00:00<?, ?it/s]Metric BinaryDecisionTreeClassifier failed on dataset adult. Skipping.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/dist-packages/sdgym/benchmark.py", line 166, in _compute_scores
score = metric.compute(*metric_args, **metric_kwargs.get(metric_name, {}))
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 127, in compute
predictions = cls._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/binary.py", line 37, in _fit_predict
return super()._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 51, in _fit_predict
test_data = ht.transform(test_data)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/utils.py", line 200, in transform
out = transform_info['one_hot_encoder'].transform(col_data).toarray()
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
X_int, X_mask = self._transform(
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
raise ValueError(msg)
ValueError: Found unknown categories ['Never-worked'] in column 0 during transform
Metric BinaryAdaBoostClassifier failed on dataset adult. Skipping.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/dist-packages/sdgym/benchmark.py", line 166, in _compute_scores
score = metric.compute(*metric_args, **metric_kwargs.get(metric_name, {}))
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 127, in compute
predictions = cls._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/binary.py", line 37, in _fit_predict
return super()._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 51, in _fit_predict
test_data = ht.transform(test_data)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/utils.py", line 200, in transform
out = transform_info['one_hot_encoder'].transform(col_data).toarray()
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
X_int, X_mask = self._transform(
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
raise ValueError(msg)
...
0%| | 0/1 [00:00<?, ?it/s]Metric BinaryDecisionTreeClassifier failed on dataset census. Skipping.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/dist-packages/sdgym/benchmark.py", line 166, in _compute_scores
score = metric.compute(*metric_args, **metric_kwargs.get(metric_name, {}))
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 127, in compute
predictions = cls._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/binary.py", line 37, in _fit_predict
return super()._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 51, in _fit_predict
test_data = ht.transform(test_data)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/utils.py", line 200, in transform
out = transform_info['one_hot_encoder'].transform(col_data).toarray()
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
X_int, X_mask = self._transform(
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
raise ValueError(msg)
ValueError: Found unknown categories ['Job leaver'] in column 0 during transform
...
0%| | 0/1 [00:00<?, ?it/s]Metric BinaryDecisionTreeClassifier failed on dataset intrusion. Skipping.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/dist-packages/sdgym/benchmark.py", line 166, in _compute_scores
score = metric.compute(*metric_args, **metric_kwargs.get(metric_name, {}))
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 127, in compute
predictions = cls._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/binary.py", line 37, in _fit_predict
return super()._fit_predict(train_data, train_target, test_data, test_target)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/single_table/efficacy/base.py", line 51, in _fit_predict
test_data = ht.transform(test_data)
File "/usr/local/lib/python3.9/dist-packages/sdmetrics/utils.py", line 200, in transform
out = transform_info['one_hot_encoder'].transform(col_data).toarray()
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 882, in transform
X_int, X_mask = self._transform(
File "/usr/local/lib/python3.9/dist-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform
raise ValueError(msg)
ValueError: Found unknown categories ['supdup', 'ftp', 'mtp', 'gopher', 'hostnames', 'rje', 'whois', 'vmnet', 'systat', 'link', 'iso_tsap', 'exec', 'bgp', 'echo', 'ldap', 'ctf', 'netstat', 'name', 'tim_i', 'courier', 'kshell', 'netbios_ssn', 'uucp', 'remote_job', 'uucp_path', 'urh_i', 'daytime', 'sunrpc', 'red_i', 'klogin', 'login', 'pop_2', 'csnet_ns', 'http_443', 'nnsp', 'tftp_u', 'auth', 'shell', 'Z39_50', 'pm_dump', 'netbios_ns', 'imap4', 'time', 'discard', 'ssh', 'pop_3', 'netbios_dgm', 'domain', 'nntp', 'sql_net'] in column 0 during transform
Just running the above snippet produces the output.
Environment Details
Please indicate the following details about the environment in which you found the bug:
Error Description
I am trying to run this:
This produces the following error:
Steps to reproduce
Just running the above snippet produces the output.