Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def build(self):
is_ap - Whether the domain is an AP domain
ap_suffix - The 2-character suffix from AP domains
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
return self.data_service.get_dataset_metadata(
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
)
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,8 @@ def build(self):
dataset_name=self.dataset_metadata.name
)
# Build dataset metadata dataframe
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
dataset_metadata = self.data_service.get_dataset_metadata(
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
).to_dict(orient="records")[0]
# Build define xml dataframe
define = self.get_define_xml_item_group_metadata_for_dataset(dataset_metadata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ def build(self):
- is_ap - Whether the domain is an AP domain
- ap_suffix - The 2-character suffix from AP domains
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
dataset_metadata = self.data_service.get_dataset_metadata(
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
)
dataset_metadata = dataset_metadata.to_dict(orient="records")[0]
data_contents_long_df = super().build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,7 @@ def get_data_structure(
return OTHER

@cached_dataset(DatasetTypes.METADATA.value)
def get_dataset_metadata(
self, dataset_name: str, size_unit: str = None, **params
) -> DatasetInterface:
def get_dataset_metadata(self, dataset_name: str, **params) -> DatasetInterface:
"""
Gets metadata of a dataset and returns it as a DataFrame.
"""
Expand Down
13 changes: 2 additions & 11 deletions cdisc_rules_engine/services/data_services/local_data_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from os.path import basename
from io import IOBase
from typing import Iterable, List, Optional, Tuple
from typing import Iterable, List, Tuple

from cdisc_rules_engine.interfaces import CacheServiceInterface, ConfigInterface
from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata
Expand All @@ -22,9 +22,6 @@
DatasetNDJSONMetadataReader,
)
from cdisc_rules_engine.services.csv_metadata_reader import DatasetCSVMetadataReader
from cdisc_rules_engine.utilities.utils import (
convert_file_size,
)
from cdisc_rules_engine.exceptions.custom_exceptions import InvalidDatasetFormat
from .base_data_service import BaseDataService, cached_dataset
from cdisc_rules_engine.enums.dataformat_types import DataFormatTypes
Expand Down Expand Up @@ -249,16 +246,10 @@ def read_data(self, file_path: str) -> IOBase:

def __get_dataset_metadata(self, dataset_path: str, **kwargs) -> Tuple[dict, dict]:
"""
Internal method that gets dataset metadata
and converts file size if needed.
Internal method that gets dataset metadata.
"""
metadata: dict = self.__read_metadata(dataset_path)
file_metadata: dict = metadata["file_metadata"]
size_unit: Optional[str] = kwargs.get("size_unit")
if size_unit: # convert file size from bytes to desired unit if needed
file_metadata["file_size"] = convert_file_size(
file_metadata["file_size"], size_unit
)
return file_metadata, metadata["contents_metadata"]

def to_parquet(self, file_path: str) -> str:
Expand Down
10 changes: 0 additions & 10 deletions cdisc_rules_engine/utilities/rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,16 +550,6 @@ def is_relationship_dataset(self, dataset_name: str) -> bool:
)
return result

def get_size_unit_from_rule(self, rule: dict) -> Optional[str]:
"""
Extracts size unit from rule if it was passed
"""
rule_conditions: ConditionInterface = rule["conditions"]
for condition in rule_conditions.values():
value: dict = condition["value"]
if value["target"] == "dataset_size":
return value.get("unit")

def add_operator_to_rule_conditions(
self, rule: dict, target_to_operator_map: dict, domain: str
):
Expand Down
13 changes: 0 additions & 13 deletions cdisc_rules_engine/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,6 @@ def convert_dataclass_to_superclass[T](instance: object, superclass: type[T]) ->
)


def convert_file_size(size_in_bytes: int, desired_unit: str) -> float:
"""
Converts file size from bytes to any of the following units:
KB, MB, GB
"""
unit_to_denominator_map: dict = {
"KB": 1024,
"MB": 1024**2,
"GB": 1024**3,
}
return size_in_bytes / unit_to_denominator_map[desired_unit]


def get_execution_status(results):
"""
If any result has an execution error, return execution error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def test_build_with_dataset_metadata(mock_build):

rule_mock = MagicMock()
rule_processor_mock = MagicMock()
rule_processor_mock.get_size_unit_from_rule.return_value = "KB"
try:
builder = ValueCheckDatasetMetadataDatasetBuilder(
rule=rule_mock,
Expand Down Expand Up @@ -133,7 +132,6 @@ def test_build_split_datasets(mock_build):

rule_mock = MagicMock()
rule_processor_mock = MagicMock()
rule_processor_mock.get_size_unit_from_rule.return_value = "KB"
try:
builder = ValueCheckDatasetMetadataDatasetBuilder(
rule=rule_mock,
Expand Down
77 changes: 0 additions & 77 deletions tests/unit/test_utilities/test_rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import pandas as pd
import pytest
from conftest import mock_data_service
from cdisc_rules_engine.exceptions.custom_exceptions import DomainNotFoundError
from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata
from cdisc_rules_engine.models.rule_conditions import ConditionCompositeFactory
Expand Down Expand Up @@ -1065,82 +1064,6 @@ def test_extract_target_names_from_rule_output_variables():
]


@pytest.mark.parametrize(
"conditions",
[
{
"any": [
{
"value": {
"target": "dataset_label",
"comparator": "Adverse Events",
},
"operator": "equal_to",
},
{
"value": {"target": "dataset_size", "unit": "MB", "comparator": 5},
"operator": "less_than",
},
]
},
{
"any": [
{
"value": {
"target": "dataset_label",
"comparator": "Adverse Events",
},
"operator": "equal_to",
},
{
"all": [
{
"value": {
"target": "dataset_size",
"unit": "MB",
"comparator": 5,
},
"operator": "less_than",
},
]
},
]
},
{
"not": {
"any": [
{
"value": {
"target": "dataset_label",
"comparator": "Adverse Events",
},
"operator": "equal_to",
},
{
"all": [
{
"value": {
"target": "dataset_size",
"unit": "MB",
"comparator": 5,
},
"operator": "less_than",
},
]
},
]
}
},
],
)
def test_get_size_unit_from_rule(conditions: dict):
rule: dict = {
"conditions": ConditionCompositeFactory.get_condition_composite(conditions),
}
processor = RuleProcessor(mock_data_service, InMemoryCacheService())
assert processor.get_size_unit_from_rule(rule) == "MB"


def test_duplicate_for_targets():
"""
Unit test for ConditionComposite.add_variable_condtions method.
Expand Down
Loading