Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ Available export options:
| `html` | Export to HTML | ✅ |
| `jsonschema` | Export to JSON Schema | ✅ |
| `odcs` | Export to Open Data Contract Standard (ODCS) V3 | ✅ |
| `osi` | Export to Open Semantic Interchange (OSI) format | ✅ |
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
| `dbt` | Export to dbt models in YAML format | ✅ |
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
Expand Down Expand Up @@ -1530,6 +1531,7 @@ Available import options:
| `glue` | Import from AWS Glue DataCatalog | ✅ |
| `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
| `jsonschema` | Import from JSON Schemas | ✅ |
| `osi` | Import from Open Semantic Interchange (OSI) | ✅ |
| `parquet` | Import from Parquet File Metadata | ✅ |
| `protobuf` | Import from Protobuf schemas | ✅ |
| `spark` | Import from Spark StructTypes, Variant | ✅ |
Expand Down Expand Up @@ -1628,6 +1630,22 @@ datacontract import --format glue --source <database_name> --glue-table <table_n
datacontract import --format glue --source <database_name>
```

#### OSI (Open Semantic Interchange)

Importing from [Open Semantic Interchange (OSI)](https://github.com/open-semantic-interchange/OSI) semantic models. OSI is a vendor-neutral standard for semantic model exchange across BI, AI, and data analytics tools.

Examples:

```bash
# Example import from OSI semantic model
datacontract import --format osi --source semantic_model.yaml
```

```bash
# Example export to OSI semantic model
datacontract export --format osi datacontract.yaml
```

#### Spark

Importing from Spark table or view these must be created or accessible in the Spark context. Specify tables list in `source` parameter. If the `source` tables are registered as tables in Databricks, and they have a table-level descriptions they will also be added to the Data Contract Specification.
Expand Down
1 change: 1 addition & 0 deletions datacontract/export/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class ExportFormat(str, Enum):
dbt_sources = "dbt-sources"
dbt_staging_sql = "dbt-staging-sql"
odcs = "odcs"
osi = "osi"
rdf = "rdf"
avro = "avro"
protobuf = "protobuf"
Expand Down
5 changes: 5 additions & 0 deletions datacontract/export/exporter_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,8 @@ def load_module_class(module_path, class_name):
exporter_factory.register_lazy_exporter(
name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter"
)
exporter_factory.register_lazy_exporter(
name=ExportFormat.osi,
module_path="datacontract.export.osi_exporter",
class_name="OsiExporter",
)
164 changes: 164 additions & 0 deletions datacontract/export/osi_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""Exporter for Open Semantic Interchange (OSI) format."""

from typing import Any, Dict, List, Optional

import yaml
from open_data_contract_standard.model import OpenDataContractStandard, SchemaObject, SchemaProperty

from datacontract.export.exporter import Exporter


class OsiExporter(Exporter):
def export(
self,
data_contract: OpenDataContractStandard,
schema_name: str,
server: str,
sql_server_type: str,
export_args: dict,
) -> str:
return export_osi(data_contract)


def export_osi(data_contract: OpenDataContractStandard) -> str:
"""Export ODCS data contract to OSI semantic model format."""
semantic_model = convert_odcs_to_osi(data_contract)
return yaml.dump({"semantic_model": semantic_model}, default_flow_style=False, sort_keys=False, allow_unicode=True)


def convert_odcs_to_osi(data_contract: OpenDataContractStandard) -> Dict[str, Any]:
"""Convert ODCS data contract to OSI semantic model."""
model = {
"name": data_contract.id or data_contract.name or "unnamed_model",
}

# Add description
if data_contract.description:
if hasattr(data_contract.description, "purpose") and data_contract.description.purpose:
model["description"] = data_contract.description.purpose
elif isinstance(data_contract.description, str):
model["description"] = data_contract.description

# Convert schemas to datasets and collect relationships
datasets = []
relationships = []

if data_contract.schema_:
for schema in data_contract.schema_:
dataset = convert_schema_to_dataset(schema)
datasets.append(dataset)

# Extract relationships from property references
schema_relationships = extract_relationships_from_schema(schema)
relationships.extend(schema_relationships)

model["datasets"] = datasets

if relationships:
model["relationships"] = relationships

return model


def convert_schema_to_dataset(schema: SchemaObject) -> Dict[str, Any]:
"""Convert ODCS SchemaObject to OSI dataset."""
dataset = {
"name": schema.name,
"source": schema.physicalName or schema.name,
}

# Extract primary key columns
primary_key = []
unique_keys = []
unique_columns = []

if schema.properties:
for prop in schema.properties:
if prop.primaryKey:
primary_key.append((prop.primaryKeyPosition or 999, prop.name))
if prop.unique and not prop.primaryKey:
unique_columns.append(prop.name)

# Sort primary key by position
primary_key.sort(key=lambda x: x[0])
if primary_key:
dataset["primary_key"] = [pk[1] for pk in primary_key]

# Add unique keys (each as single-column key)
if unique_columns:
dataset["unique_keys"] = [[col] for col in unique_columns]

# Add description
if schema.description:
dataset["description"] = schema.description

# Convert properties to fields
if schema.properties:
fields = [convert_property_to_field(prop) for prop in schema.properties]
dataset["fields"] = fields

return dataset


def convert_property_to_field(prop: SchemaProperty) -> Dict[str, Any]:
"""Convert ODCS SchemaProperty to OSI field."""
field = {
"name": prop.name,
"expression": {
"dialects": [
{
"dialect": "ANSI_SQL",
"expression": prop.physicalName or prop.name,
}
]
},
}

# Add other dialects from custom properties
if prop.customProperties:
for cp in prop.customProperties:
if cp.property == "osi_dialects" and cp.value:
field["expression"]["dialects"].extend(cp.value)

# Add dimension for time types
if prop.logicalType in ["date", "timestamp", "datetime"]:
field["dimension"] = {"is_time": True}

# Add description
if prop.description:
field["description"] = prop.description

# Add label from businessName
if prop.businessName:
field["label"] = prop.businessName

return field


def extract_relationships_from_schema(schema: SchemaObject) -> List[Dict[str, Any]]:
"""Extract foreign key relationships from schema properties."""
relationships = []

if not schema.properties:
return relationships

for prop in schema.properties:
if prop.relationships:
for rel_obj in prop.relationships:
# Parse reference from 'to' field: "target_table.target_column"
if rel_obj.to:
parts = rel_obj.to.split(".")
if len(parts) >= 2:
to_table = parts[0]
to_column = parts[1]

rel = {
"name": f"{schema.name}_{prop.name}_to_{to_table}",
"from": schema.name,
"to": to_table,
"from_columns": [prop.name],
"to_columns": [to_column],
}
relationships.append(rel)

return relationships
1 change: 1 addition & 0 deletions datacontract/imports/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class ImportFormat(str, Enum):
json = "json"
bigquery = "bigquery"
odcs = "odcs"
osi = "osi"
unity = "unity"
spark = "spark"
iceberg = "iceberg"
Expand Down
5 changes: 5 additions & 0 deletions datacontract/imports/importer_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,8 @@ def load_module_class(module_path, class_name):
module_path="datacontract.imports.json_importer",
class_name="JsonImporter",
)
importer_factory.register_lazy_importer(
name=ImportFormat.osi,
module_path="datacontract.imports.osi_importer",
class_name="OsiImporter",
)
Loading
Loading