diff --git a/README.md b/README.md index c7ff8da0..b30b1783 100644 --- a/README.md +++ b/README.md @@ -1104,6 +1104,7 @@ Available export options: | `html` | Export to HTML | ✅ | | `jsonschema` | Export to JSON Schema | ✅ | | `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ | +| `osi` | Export to Open Semantic Interchange (OSI) format | ✅ | | `sodacl` | Export to SodaCL quality checks in YAML format | ✅ | | `dbt` | Export to dbt models in YAML format | ✅ | | `dbt-sources` | Export to dbt sources in YAML format | ✅ | @@ -1530,6 +1531,7 @@ Available import options: | `glue` | Import from AWS Glue DataCatalog | ✅ | | `iceberg` | Import from an Iceberg JSON Schema Definition | partial | | `jsonschema` | Import from JSON Schemas | ✅ | +| `osi` | Import from Open Semantic Interchange (OSI) | ✅ | | `parquet` | Import from Parquet File Metadata | ✅ | | `protobuf` | Import from Protobuf schemas | ✅ | | `spark` | Import from Spark StructTypes, Variant | ✅ | @@ -1628,6 +1630,22 @@ datacontract import --format glue --source --glue-table ``` +#### OSI (Open Semantic Interchange) + +Importing from [Open Semantic Interchange (OSI)](https://github.com/open-semantic-interchange/OSI) semantic models. OSI is a vendor-neutral standard for semantic model exchange across BI, AI, and data analytics tools. + +Examples: + +```bash +# Example import from OSI semantic model +datacontract import --format osi --source semantic_model.yaml +``` + +```bash +# Example export to OSI semantic model +datacontract export --format osi datacontract.yaml +``` + #### Spark Importing from Spark table or view these must be created or accessible in the Spark context. Specify tables list in `source` parameter. If the `source` tables are registered as tables in Databricks, and they have a table-level descriptions they will also be added to the Data Contract Specification. diff --git a/datacontract/export/exporter.py b/datacontract/export/exporter.py index c87e6592..e54db10b 100644 --- a/datacontract/export/exporter.py +++ b/datacontract/export/exporter.py @@ -40,6 +40,7 @@ class ExportFormat(str, Enum): dbt_sources = "dbt-sources" dbt_staging_sql = "dbt-staging-sql" odcs = "odcs" + osi = "osi" rdf = "rdf" avro = "avro" protobuf = "protobuf" diff --git a/datacontract/export/exporter_factory.py b/datacontract/export/exporter_factory.py index 76a8cf97..72ba132a 100644 --- a/datacontract/export/exporter_factory.py +++ b/datacontract/export/exporter_factory.py @@ -208,3 +208,8 @@ def load_module_class(module_path, class_name): exporter_factory.register_lazy_exporter( name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter" ) +exporter_factory.register_lazy_exporter( + name=ExportFormat.osi, + module_path="datacontract.export.osi_exporter", + class_name="OsiExporter", +) diff --git a/datacontract/export/osi_exporter.py b/datacontract/export/osi_exporter.py new file mode 100644 index 00000000..0b15739a --- /dev/null +++ b/datacontract/export/osi_exporter.py @@ -0,0 +1,164 @@ +"""Exporter for Open Semantic Interchange (OSI) format.""" + +from typing import Any, Dict, List, Optional + +import yaml +from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject, SchemaProperty + +from datacontract.export.exporter import Exporter + + +class OsiExporter(Exporter): + def export( + self, + data_contract: OpenDataContractStandard, + schema_name: str, + server: str, + sql_server_type: str, + export_args: dict, + ) -> str: + return export_osi(data_contract) + + +def export_osi(data_contract: OpenDataContractStandard) -> str: + """Export ODCS data contract to OSI semantic model format.""" + semantic_model = convert_odcs_to_osi(data_contract) + return yaml.dump({"semantic_model": semantic_model}, default_flow_style=False, sort_keys=False, allow_unicode=True) + + +def convert_odcs_to_osi(data_contract: OpenDataContractStandard) -> Dict[str, Any]: + """Convert ODCS data contract to OSI semantic model.""" + model = { + "name": data_contract.id or data_contract.name or "unnamed_model", + } + + # Add description + if data_contract.description: + if hasattr(data_contract.description, "purpose") and data_contract.description.purpose: + model["description"] = data_contract.description.purpose + elif isinstance(data_contract.description, str): + model["description"] = data_contract.description + + # Convert schemas to datasets and collect relationships + datasets = [] + relationships = [] + + if data_contract.schema_: + for schema in data_contract.schema_: + dataset = convert_schema_to_dataset(schema) + datasets.append(dataset) + + # Extract relationships from property references + schema_relationships = extract_relationships_from_schema(schema) + relationships.extend(schema_relationships) + + model["datasets"] = datasets + + if relationships: + model["relationships"] = relationships + + return model + + +def convert_schema_to_dataset(schema: SchemaObject) -> Dict[str, Any]: + """Convert ODCS SchemaObject to OSI dataset.""" + dataset = { + "name": schema.name, + "source": schema.physicalName or schema.name, + } + + # Extract primary key columns + primary_key = [] + unique_keys = [] + unique_columns = [] + + if schema.properties: + for prop in schema.properties: + if prop.primaryKey: + primary_key.append((prop.primaryKeyPosition or 999, prop.name)) + if prop.unique and not prop.primaryKey: + unique_columns.append(prop.name) + + # Sort primary key by position + primary_key.sort(key=lambda x: x[0]) + if primary_key: + dataset["primary_key"] = [pk[1] for pk in primary_key] + + # Add unique keys (each as single-column key) + if unique_columns: + dataset["unique_keys"] = [[col] for col in unique_columns] + + # Add description + if schema.description: + dataset["description"] = schema.description + + # Convert properties to fields + if schema.properties: + fields = [convert_property_to_field(prop) for prop in schema.properties] + dataset["fields"] = fields + + return dataset + + +def convert_property_to_field(prop: SchemaProperty) -> Dict[str, Any]: + """Convert ODCS SchemaProperty to OSI field.""" + field = { + "name": prop.name, + "expression": { + "dialects": [ + { + "dialect": "ANSI_SQL", + "expression": prop.physicalName or prop.name, + } + ] + }, + } + + # Add other dialects from custom properties + if prop.customProperties: + for cp in prop.customProperties: + if cp.property == "osi_dialects" and cp.value: + field["expression"]["dialects"].extend(cp.value) + + # Add dimension for time types + if prop.logicalType in ["date", "timestamp", "datetime"]: + field["dimension"] = {"is_time": True} + + # Add description + if prop.description: + field["description"] = prop.description + + # Add label from businessName + if prop.businessName: + field["label"] = prop.businessName + + return field + + +def extract_relationships_from_schema(schema: SchemaObject) -> List[Dict[str, Any]]: + """Extract foreign key relationships from schema properties.""" + relationships = [] + + if not schema.properties: + return relationships + + for prop in schema.properties: + if prop.relationships: + for rel_obj in prop.relationships: + # Parse reference from 'to' field: "target_table.target_column" + if rel_obj.to: + parts = rel_obj.to.split(".") + if len(parts) >= 2: + to_table = parts[0] + to_column = parts[1] + + rel = { + "name": f"{schema.name}_{prop.name}_to_{to_table}", + "from": schema.name, + "to": to_table, + "from_columns": [prop.name], + "to_columns": [to_column], + } + relationships.append(rel) + + return relationships diff --git a/datacontract/imports/importer.py b/datacontract/imports/importer.py index 24961fb8..26bc3252 100644 --- a/datacontract/imports/importer.py +++ b/datacontract/imports/importer.py @@ -31,6 +31,7 @@ class ImportFormat(str, Enum): json = "json" bigquery = "bigquery" odcs = "odcs" + osi = "osi" unity = "unity" spark = "spark" iceberg = "iceberg" diff --git a/datacontract/imports/importer_factory.py b/datacontract/imports/importer_factory.py index 6566f18b..e1ef116c 100644 --- a/datacontract/imports/importer_factory.py +++ b/datacontract/imports/importer_factory.py @@ -126,3 +126,8 @@ def load_module_class(module_path, class_name): module_path="datacontract.imports.json_importer", class_name="JsonImporter", ) +importer_factory.register_lazy_importer( + name=ImportFormat.osi, + module_path="datacontract.imports.osi_importer", + class_name="OsiImporter", +) diff --git a/datacontract/imports/osi_importer.py b/datacontract/imports/osi_importer.py new file mode 100644 index 00000000..33dc574f --- /dev/null +++ b/datacontract/imports/osi_importer.py @@ -0,0 +1,228 @@ +"""Importer for Open Semantic Interchange (OSI) format.""" + +from typing import Any, Dict, List, Optional + +import yaml +from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard, Relationship, SchemaProperty + +from datacontract.imports.importer import Importer +from datacontract.imports.odcs_helper import create_odcs, create_property, create_schema_object +from datacontract.model.exceptions import DataContractException + + +class OsiImporter(Importer): + def import_source(self, source: str, import_args: dict) -> OpenDataContractStandard: + return import_osi(source) + + +def import_osi(source: str) -> OpenDataContractStandard: + """Import an OSI semantic model and create an ODCS data contract.""" + try: + with open(source, "r") as f: + osi_data = yaml.safe_load(f) + except Exception as e: + raise DataContractException( + type="schema", + name="Parse OSI", + reason=f"Failed to parse OSI file from {source}", + engine="datacontract", + original_exception=e, + ) + + if "semantic_model" not in osi_data: + raise DataContractException( + type="schema", + name="Parse OSI", + reason="Invalid OSI format: missing 'semantic_model' root element", + engine="datacontract", + ) + + semantic_model = osi_data["semantic_model"] + return convert_osi_to_odcs(semantic_model) + + +def convert_osi_to_odcs(semantic_model: Dict[str, Any]) -> OpenDataContractStandard: + """Convert OSI semantic model to ODCS format.""" + name = semantic_model.get("name", "unnamed_model") + description = semantic_model.get("description") + ai_context = semantic_model.get("ai_context") + + # Combine description and ai_context + full_description = description + if ai_context: + if full_description: + full_description = f"{full_description}\n\nAI Context: {ai_context}" + else: + full_description = f"AI Context: {ai_context}" + + odcs = create_odcs(name=name) + + # Build relationship lookup: from_dataset.from_column -> to_dataset.to_column + relationships = semantic_model.get("relationships", []) + relationship_map = build_relationship_map(relationships) + + # Convert datasets to schemas + datasets = semantic_model.get("datasets", []) + schemas = [] + for dataset in datasets: + schema = convert_dataset_to_schema(dataset, relationship_map) + schemas.append(schema) + + odcs.schema_ = schemas + + # Store metrics in custom properties if present + metrics = semantic_model.get("metrics", []) + if metrics: + odcs.customProperties = [ + CustomProperty(property="osi_metrics", value=metrics) + ] + + return odcs + + +def build_relationship_map(relationships: List[Dict[str, Any]]) -> Dict[str, str]: + """Build a map of from_dataset.column -> to_dataset.column for FK references.""" + rel_map = {} + for rel in relationships: + from_dataset = rel.get("from") + to_dataset = rel.get("to") + from_columns = rel.get("from_columns", []) + to_columns = rel.get("to_columns", []) + + for from_col, to_col in zip(from_columns, to_columns): + key = f"{from_dataset}.{from_col}" + value = f"{to_dataset}.{to_col}" + rel_map[key] = value + + return rel_map + + +def convert_dataset_to_schema(dataset: Dict[str, Any], relationship_map: Dict[str, str]): + """Convert an OSI dataset to an ODCS SchemaObject.""" + name = dataset.get("name") + source = dataset.get("source") + description = dataset.get("description") + ai_context = dataset.get("ai_context") + primary_key = dataset.get("primary_key", []) + unique_keys = dataset.get("unique_keys", []) + fields = dataset.get("fields", []) + + # Combine description and ai_context + full_description = description + if ai_context: + if full_description: + full_description = f"{full_description}\n\nAI Context: {ai_context}" + else: + full_description = f"AI Context: {ai_context}" + + # Flatten unique_keys to a set of column names + unique_columns = set() + for uk in unique_keys: + if isinstance(uk, list): + for col in uk: + unique_columns.add(col) + else: + unique_columns.add(uk) + + # Convert fields to properties + properties = [] + for idx, field in enumerate(fields): + prop = convert_field_to_property( + field=field, + dataset_name=name, + primary_key=primary_key, + unique_columns=unique_columns, + relationship_map=relationship_map, + ) + properties.append(prop) + + schema = create_schema_object( + name=name, + physical_type="table", + description=full_description, + properties=properties, + ) + schema.physicalName = source + + return schema + + +def convert_field_to_property( + field: Dict[str, Any], + dataset_name: str, + primary_key: List[str], + unique_columns: set, + relationship_map: Dict[str, str], +) -> SchemaProperty: + """Convert an OSI field to an ODCS SchemaProperty.""" + name = field.get("name") + label = field.get("label") + description = field.get("description") + ai_context = field.get("ai_context") + expression = field.get("expression", {}) + dimension = field.get("dimension", {}) + + # Combine description and ai_context + full_description = description + if ai_context: + if full_description: + full_description = f"{full_description}\n\nAI Context: {ai_context}" + else: + full_description = f"AI Context: {ai_context}" + + # Determine if this is a time dimension + is_time = dimension.get("is_time", False) if dimension else False + + # Infer logical type from dimension or default to string + logical_type = "string" + if is_time: + logical_type = "timestamp" + + # Check if primary key + is_primary_key = name in primary_key + pk_position = primary_key.index(name) + 1 if is_primary_key else None + + # Check if unique + is_unique = name in unique_columns + + # Check for foreign key reference + fk_key = f"{dataset_name}.{name}" + reference = relationship_map.get(fk_key) + + # Get expression for storage + dialects = expression.get("dialects", []) + expr_value = None + if dialects: + # Prefer ANSI_SQL, fallback to first dialect + for d in dialects: + if d.get("dialect") == "ANSI_SQL": + expr_value = d.get("expression") + break + if not expr_value and dialects: + expr_value = dialects[0].get("expression") + + # Store non-ANSI dialects in custom properties + custom_props = {} + other_dialects = [d for d in dialects if d.get("dialect") != "ANSI_SQL"] + if other_dialects: + custom_props["osi_dialects"] = other_dialects + + prop = create_property( + name=name, + logical_type=logical_type, + description=full_description, + primary_key=is_primary_key, + primary_key_position=pk_position, + unique=is_unique if is_unique else None, + custom_properties=custom_props if custom_props else None, + ) + + # Set business name from label + if label: + prop.businessName = label + + # Set relationship for foreign keys + if reference: + prop.relationships = [Relationship(type="foreignKey", to=reference)] + + return prop diff --git a/tests/fixtures/osi/export/expected_osi.yaml b/tests/fixtures/osi/export/expected_osi.yaml new file mode 100644 index 00000000..a1d412b6 --- /dev/null +++ b/tests/fixtures/osi/export/expected_osi.yaml @@ -0,0 +1,84 @@ +semantic_model: + name: orders-osi-test + description: Test data contract for OSI export + datasets: + - name: orders + source: sales.orders + primary_key: + - order_id + description: Order transactions + fields: + - name: order_id + expression: + dialects: + - dialect: ANSI_SQL + expression: order_id + description: Unique order identifier + - name: customer_id + expression: + dialects: + - dialect: ANSI_SQL + expression: customer_id + description: Reference to customer + - name: order_date + expression: + dialects: + - dialect: ANSI_SQL + expression: order_date + dimension: + is_time: true + description: Date order was placed + - name: total_amount + expression: + dialects: + - dialect: ANSI_SQL + expression: total_amount + description: Total order value + - name: status + expression: + dialects: + - dialect: ANSI_SQL + expression: status + description: Order status + - name: customers + source: sales.customers + primary_key: + - customer_id + unique_keys: + - - email + description: Customer dimension + fields: + - name: customer_id + expression: + dialects: + - dialect: ANSI_SQL + expression: customer_id + description: Unique customer identifier + - name: customer_name + expression: + dialects: + - dialect: ANSI_SQL + expression: customer_name + description: Full customer name + - name: email + expression: + dialects: + - dialect: ANSI_SQL + expression: email + description: Customer email address + - name: created_at + expression: + dialects: + - dialect: ANSI_SQL + expression: created_at + dimension: + is_time: true + description: Account creation timestamp + relationships: + - name: orders_customer_id_to_customers + from: orders + to: customers + from_columns: + - customer_id + to_columns: + - customer_id diff --git a/tests/fixtures/osi/export/odcs.yaml b/tests/fixtures/osi/export/odcs.yaml new file mode 100644 index 00000000..86c6b477 --- /dev/null +++ b/tests/fixtures/osi/export/odcs.yaml @@ -0,0 +1,71 @@ +kind: DataContract +apiVersion: v3.1.0 +id: orders-osi-test +name: Orders OSI Test +version: 1.0.0 +status: active +description: + purpose: Test data contract for OSI export +schema: +- name: orders + physicalType: table + physicalName: sales.orders + description: Order transactions + properties: + - name: order_id + physicalType: varchar + description: Unique order identifier + primaryKey: true + logicalType: string + required: true + - name: customer_id + physicalType: integer + description: Reference to customer + logicalType: integer + required: true + relationships: + - type: foreignKey + to: customers.customer_id + - name: order_date + physicalType: date + description: Date order was placed + logicalType: date + required: true + - name: total_amount + physicalType: decimal + description: Total order value + logicalType: number + required: false + - name: status + physicalType: varchar + description: Order status + logicalType: string + required: true +- name: customers + physicalType: table + physicalName: sales.customers + description: Customer dimension + properties: + - name: customer_id + physicalType: integer + description: Unique customer identifier + primaryKey: true + logicalType: integer + required: true + unique: true + - name: customer_name + physicalType: varchar + description: Full customer name + logicalType: string + required: true + - name: email + physicalType: varchar + description: Customer email address + logicalType: string + required: true + unique: true + - name: created_at + physicalType: timestamp + description: Account creation timestamp + logicalType: timestamp + required: false diff --git a/tests/fixtures/osi/import/expected_odcs.yaml b/tests/fixtures/osi/import/expected_odcs.yaml new file mode 100644 index 00000000..70f59b4e --- /dev/null +++ b/tests/fixtures/osi/import/expected_odcs.yaml @@ -0,0 +1,78 @@ +version: 1.0.0 +kind: DataContract +apiVersion: v3.1.0 +id: my-data-contract +name: orders_model +status: draft +schema: +- name: orders + physicalType: table + description: 'Order transactions + + + AI Context: Main fact table for orders' + logicalType: object + physicalName: sales.orders + properties: + - name: order_id + description: Unique order identifier + businessName: Order ID + primaryKey: true + primaryKeyPosition: 1 + logicalType: string + - name: customer_id + description: Reference to customer + logicalType: string + relationships: + - type: foreignKey + to: customers.customer_id + - name: order_date + description: Date order was placed + logicalType: timestamp + - name: total_amount + description: Total order value + logicalType: string + - name: status + description: Order status + logicalType: string +- name: customers + physicalType: table + description: Customer dimension + logicalType: object + physicalName: sales.customers + properties: + - name: customer_id + description: Unique customer identifier + primaryKey: true + primaryKeyPosition: 1 + logicalType: string + - name: customer_name + description: Full customer name + customProperties: + - property: osi_dialects + value: + - dialect: SNOWFLAKE + expression: CONCAT(first_name, ' ', last_name) + logicalType: string + - name: email + description: Customer email address + logicalType: string + unique: true + - name: created_at + description: Account creation timestamp + logicalType: timestamp +customProperties: +- property: osi_metrics + value: + - name: total_revenue + expression: + dialects: + - dialect: ANSI_SQL + expression: SUM(total_amount) + description: Total revenue from all orders + - name: order_count + expression: + dialects: + - dialect: ANSI_SQL + expression: COUNT(DISTINCT order_id) + description: Number of unique orders diff --git a/tests/fixtures/osi/import/orders_semantic_model.yaml b/tests/fixtures/osi/import/orders_semantic_model.yaml new file mode 100644 index 00000000..e7587d4a --- /dev/null +++ b/tests/fixtures/osi/import/orders_semantic_model.yaml @@ -0,0 +1,105 @@ +semantic_model: + name: orders_model + description: Order management semantic model + ai_context: Use this model for sales and order analytics + datasets: + - name: orders + source: sales.orders + primary_key: + - order_id + description: Order transactions + ai_context: Main fact table for orders + fields: + - name: order_id + expression: + dialects: + - dialect: ANSI_SQL + expression: order_id + label: Order ID + description: Unique order identifier + - name: customer_id + expression: + dialects: + - dialect: ANSI_SQL + expression: customer_id + description: Reference to customer + - name: order_date + expression: + dialects: + - dialect: ANSI_SQL + expression: order_date + dimension: + is_time: true + description: Date order was placed + - name: total_amount + expression: + dialects: + - dialect: ANSI_SQL + expression: total_amount + description: Total order value + - name: status + expression: + dialects: + - dialect: ANSI_SQL + expression: status + description: Order status + - name: customers + source: sales.customers + primary_key: + - customer_id + unique_keys: + - - email + description: Customer dimension + fields: + - name: customer_id + expression: + dialects: + - dialect: ANSI_SQL + expression: customer_id + description: Unique customer identifier + - name: customer_name + expression: + dialects: + - dialect: ANSI_SQL + expression: "first_name || ' ' || last_name" + - dialect: SNOWFLAKE + expression: CONCAT(first_name, ' ', last_name) + description: Full customer name + - name: email + expression: + dialects: + - dialect: ANSI_SQL + expression: email + description: Customer email address + - name: created_at + expression: + dialects: + - dialect: ANSI_SQL + expression: created_at + dimension: + is_time: true + description: Account creation timestamp + relationships: + - name: orders_to_customers + from: orders + to: customers + from_columns: + - customer_id + to_columns: + - customer_id + metrics: + - name: total_revenue + expression: + dialects: + - dialect: ANSI_SQL + expression: SUM(total_amount) + description: Total revenue from all orders + - name: order_count + expression: + dialects: + - dialect: ANSI_SQL + expression: COUNT(DISTINCT order_id) + description: Number of unique orders + custom_extensions: + - vendor_name: DBT + data: '{"project": "sales_analytics"}' diff --git a/tests/fixtures/osi/import/simple_model.yaml b/tests/fixtures/osi/import/simple_model.yaml new file mode 100644 index 00000000..e89c68f9 --- /dev/null +++ b/tests/fixtures/osi/import/simple_model.yaml @@ -0,0 +1,27 @@ +semantic_model: + name: simple_model + description: Simple semantic model for basic testing + datasets: + - name: users + source: public.users + primary_key: + - id + fields: + - name: id + expression: + dialects: + - dialect: ANSI_SQL + expression: id + description: User ID + - name: name + expression: + dialects: + - dialect: ANSI_SQL + expression: name + description: User name + - name: active + expression: + dialects: + - dialect: ANSI_SQL + expression: active + description: Is user active diff --git a/tests/test_export_osi.py b/tests/test_export_osi.py new file mode 100644 index 00000000..e8f6ac4e --- /dev/null +++ b/tests/test_export_osi.py @@ -0,0 +1,94 @@ +import yaml +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + + +def test_cli(): + runner = CliRunner() + result = runner.invoke( + app, + ["export", "./fixtures/osi/export/odcs.yaml", "--format", "osi"], + ) + assert result.exit_code == 0 + + +def test_export_osi_semantic_model(): + data_contract = DataContract(data_contract_file="fixtures/osi/export/odcs.yaml") + result = data_contract.export("osi") + + with open("fixtures/osi/export/expected_osi.yaml") as f: + expected = f.read() + + print("Result:\n", result) + assert yaml.safe_load(result) == yaml.safe_load(expected) + + +def test_export_osi_relationships(): + """Test that references are exported as relationships.""" + data_contract = DataContract(data_contract_file="fixtures/osi/export/odcs.yaml") + result = data_contract.export("osi") + + osi_model = yaml.safe_load(result) + model = osi_model["semantic_model"] + + # Verify relationships exist + assert "relationships" in model + assert len(model["relationships"]) >= 1 + + # Verify orders->customers relationship + rel = next( + (r for r in model["relationships"] if r["from"] == "orders" and r["to"] == "customers"), + None, + ) + assert rel is not None + assert rel["from_columns"] == ["customer_id"] + assert rel["to_columns"] == ["customer_id"] + + +def test_export_osi_time_dimensions(): + """Test that date/timestamp fields get dimension.is_time=true.""" + data_contract = DataContract(data_contract_file="fixtures/osi/export/odcs.yaml") + result = data_contract.export("osi") + + osi_model = yaml.safe_load(result) + orders = next(d for d in osi_model["semantic_model"]["datasets"] if d["name"] == "orders") + + order_date_field = next(f for f in orders["fields"] if f["name"] == "order_date") + assert "dimension" in order_date_field + assert order_date_field["dimension"]["is_time"] is True + + +def test_export_osi_expressions(): + """Test that fields have proper ANSI_SQL expressions.""" + data_contract = DataContract(data_contract_file="fixtures/osi/export/odcs.yaml") + result = data_contract.export("osi") + + osi_model = yaml.safe_load(result) + orders = next(d for d in osi_model["semantic_model"]["datasets"] if d["name"] == "orders") + + for field in orders["fields"]: + assert "expression" in field + assert "dialects" in field["expression"] + dialect = field["expression"]["dialects"][0] + assert dialect["dialect"] == "ANSI_SQL" + assert "expression" in dialect + + +def test_roundtrip_osi(): + """Test that import -> export produces valid OSI.""" + # Import OSI + imported_odcs = DataContract.import_from_source("osi", "fixtures/osi/import/orders_semantic_model.yaml") + + # Wrap in DataContract to export + dc = DataContract(data_contract=imported_odcs) + exported = dc.export("osi") + osi_model = yaml.safe_load(exported) + + # Verify structure is valid + assert "semantic_model" in osi_model + model = osi_model["semantic_model"] + assert "name" in model + assert "datasets" in model + assert len(model["datasets"]) == 2 diff --git a/tests/test_import_osi.py b/tests/test_import_osi.py new file mode 100644 index 00000000..2972d43e --- /dev/null +++ b/tests/test_import_osi.py @@ -0,0 +1,68 @@ +import yaml +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + + +def test_cli(): + runner = CliRunner() + result = runner.invoke( + app, + [ + "import", + "--format", + "osi", + "--source", + "fixtures/osi/import/orders_semantic_model.yaml", + ], + ) + assert result.exit_code == 0 + + +def test_import_osi_semantic_model(): + result = DataContract.import_from_source("osi", "fixtures/osi/import/orders_semantic_model.yaml") + + with open("fixtures/osi/import/expected_odcs.yaml") as f: + expected = f.read() + + print("Result:\n", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + + +def test_import_osi_with_metrics(): + """Test that metrics are preserved in custom properties.""" + result = DataContract.import_from_source("osi", "fixtures/osi/import/orders_semantic_model.yaml") + + # Metrics should be stored in customProperties at contract level or as special schema + yaml_output = result.to_yaml() + assert "total_revenue" in yaml_output or "metrics" in yaml_output + + +def test_import_osi_relationships(): + """Test that relationships are properly imported.""" + result = DataContract.import_from_source("osi", "fixtures/osi/import/orders_semantic_model.yaml") + + orders_schema = next(s for s in result.schema_ if s.name == "orders") + customer_id = next(p for p in orders_schema.properties if p.name == "customer_id") + + # Relationship should be converted to relationships list + assert customer_id.relationships is not None + assert len(customer_id.relationships) == 1 + assert customer_id.relationships[0].to == "customers.customer_id" + + +def test_import_osi_simple(): + """Test importing a simple OSI model without relationships or metrics.""" + result = DataContract.import_from_source("osi", "fixtures/osi/import/simple_model.yaml") + + assert result.name == "simple_model" + assert len(result.schema_) == 1 + + users_schema = result.schema_[0] + assert users_schema.name == "users" + assert users_schema.physicalName == "public.users" + assert len(users_schema.properties) == 3 + + id_prop = next(p for p in users_schema.properties if p.name == "id") + assert id_prop.primaryKey is True