Skip to content

Commit 711ad04

Browse files
added test cases for high level functions
1 parent eea6153 commit 711ad04

File tree

2 files changed

+96
-2
lines changed

2 files changed

+96
-2
lines changed

src/data_tools/analysis/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def profile(self) -> None:
119119
Profiles the dataset including table and columns and stores the result in the 'results' dictionary.
120120
This is a convenience method to run profiling on the raw dataframe.
121121
"""
122-
if not self.raw_df:
122+
if self.raw_df.empty:
123123
raise ValueError("The raw dataframe is empty. Cannot perform profiling.")
124124
self.profile_table().profile_columns()
125125
return self
@@ -129,7 +129,7 @@ def identify_datatypes(self) -> None:
129129
Identifies the data types for the dataset and stores the result in the 'results' dictionary.
130130
This is a convenience method to run data type identification on the raw dataframe.
131131
"""
132-
if not self.raw_df:
132+
if self.raw_df.empty:
133133
raise ValueError("The raw dataframe is empty. Cannot perform data type identification.")
134134
self.identify_datatypes_l1().identify_datatypes_l2()
135135
return self

tests/analysis/test_high_level.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import pandas as pd
2+
import pytest
3+
4+
from data_tools.analysis.models import DataSet
5+
6+
7+
@pytest.fixture
8+
def sample_dataframe():
9+
"""Fixture to provide a sample DataFrame for testing."""
10+
return pd.DataFrame({
11+
"user_id": [1, 2, 3, 4, 5],
12+
"product_name": ["Laptop", "Mouse", "Keyboard", "Monitor", "Webcam"],
13+
"price": [1200.50, 25.00, 75.99, 300.00, 55.50],
14+
"purchase_date": pd.to_datetime([
15+
"2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-19"
16+
]),
17+
})
18+
19+
20+
def test_profile(sample_dataframe):
21+
"""Test the profile convenience method."""
22+
dataset = DataSet(df=sample_dataframe, name="test_table")
23+
dataset.profile()
24+
25+
assert "table_profile" in dataset.results
26+
table_profile = dataset.results["table_profile"]
27+
assert table_profile is not None
28+
assert table_profile.count == 5
29+
assert set(table_profile.columns) == {"user_id", "product_name", "price", "purchase_date"}
30+
31+
assert "column_profiles" in dataset.results
32+
column_profiles = dataset.results["column_profiles"]
33+
assert column_profiles is not None
34+
assert len(column_profiles) == 4
35+
36+
37+
def test_identify_datatypes(sample_dataframe):
38+
"""Test the identify_datatypes convenience method."""
39+
dataset = DataSet(df=sample_dataframe, name="test_table")
40+
dataset.profile()
41+
dataset.identify_datatypes()
42+
43+
assert "column_datatypes_l1" in dataset.results
44+
column_datatypes_l1 = dataset.results["column_datatypes_l1"]
45+
assert column_datatypes_l1 is not None
46+
assert len(column_datatypes_l1) == 4
47+
48+
assert "column_datatypes_l2" in dataset.results
49+
column_datatypes_l2 = dataset.results["column_datatypes_l2"]
50+
assert column_datatypes_l2 is not None
51+
assert len(column_datatypes_l2) == 4
52+
53+
54+
def test_identify_keys(sample_dataframe):
55+
"""Test the identify_keys method."""
56+
dataset = DataSet(df=sample_dataframe, name="test_table")
57+
dataset.profile()
58+
dataset.identify_datatypes()
59+
dataset.identify_keys()
60+
61+
assert "key" in dataset.results
62+
key = dataset.results["key"]
63+
assert key is not None
64+
65+
66+
def test_generate_glossary(sample_dataframe):
67+
"""Test the generate_glossary method."""
68+
dataset = DataSet(df=sample_dataframe, name="test_table")
69+
dataset.profile()
70+
dataset.identify_datatypes()
71+
dataset.generate_glossary(domain="ecommerce")
72+
73+
assert "business_glossary_and_tags" in dataset.results
74+
glossary = dataset.results["business_glossary_and_tags"]
75+
assert glossary is not None
76+
assert "table_glossary" in dataset.results
77+
table_glossary = dataset.results["table_glossary"]
78+
assert table_glossary is not None
79+
80+
81+
def test_save_yaml(sample_dataframe, tmp_path):
82+
"""Test the save_yaml method."""
83+
dataset = DataSet(df=sample_dataframe, name="test_table")
84+
dataset.profile()
85+
dataset.identify_datatypes()
86+
dataset.generate_glossary(domain="ecommerce")
87+
88+
file_path = tmp_path / "test_table.yml"
89+
dataset.save_yaml(file_path=str(file_path))
90+
91+
assert file_path.exists()
92+
with open(file_path, "r") as file:
93+
content = file.read()
94+
assert "sources" in content

0 commit comments

Comments
 (0)