diff --git a/datacontract/export/odcs_export_helper.py b/datacontract/export/odcs_export_helper.py index 722c5a651..70b67169c 100644 --- a/datacontract/export/odcs_export_helper.py +++ b/datacontract/export/odcs_export_helper.py @@ -73,10 +73,21 @@ def get_property_config(prop: SchemaProperty, key: str) -> Optional[Any]: def get_logical_type_option(prop: SchemaProperty, key: str) -> Optional[Any]: - """Get a logical type option from a SchemaProperty.""" - if prop.logicalTypeOptions is None: - return None - return prop.logicalTypeOptions.get(key) + """Get a logical type option from a SchemaProperty. + + Checks logicalTypeOptions first, then customProperties as fallback. + This is needed because precision and scale are stored in customProperties + rather than logicalTypeOptions (which does not allow them for number type per ODCS schema). + """ + if prop.logicalTypeOptions is not None: + val = prop.logicalTypeOptions.get(key) + if val is not None: + return val + if prop.customProperties is not None: + for cp in prop.customProperties: + if cp.property == key: + return cp.value + return None def iter_properties(schema: SchemaObject) -> List[Tuple[str, SchemaProperty]]: diff --git a/datacontract/export/sqlalchemy_exporter.py b/datacontract/export/sqlalchemy_exporter.py index ca0240660..1bfc64d24 100644 --- a/datacontract/export/sqlalchemy_exporter.py +++ b/datacontract/export/sqlalchemy_exporter.py @@ -35,10 +35,16 @@ def _get_type(prop: SchemaProperty) -> Optional[str]: def _get_logical_type_option(prop: SchemaProperty, key: str): - """Get a logical type option value.""" - if prop.logicalTypeOptions is None: - return None - return prop.logicalTypeOptions.get(key) + """Get a logical type option value, checking logicalTypeOptions and customProperties.""" + if prop.logicalTypeOptions is not None: + val = prop.logicalTypeOptions.get(key) + if val is not None: + return val + if prop.customProperties is not None: + for cp in prop.customProperties: + if cp.property == key: + return cp.value + return None def to_sqlalchemy_model_str(odcs: OpenDataContractStandard, sql_server_type: str = "", server=None) -> str: diff --git a/datacontract/imports/odcs_helper.py b/datacontract/imports/odcs_helper.py index 4c5a27f35..fb494ffcd 100644 --- a/datacontract/imports/odcs_helper.py +++ b/datacontract/imports/odcs_helper.py @@ -119,18 +119,21 @@ def create_property( logical_type_options["exclusiveMinimum"] = exclusive_minimum if exclusive_maximum is not None: logical_type_options["exclusiveMaximum"] = exclusive_maximum - if precision is not None: - logical_type_options["precision"] = precision - if scale is not None: - logical_type_options["scale"] = scale if format: logical_type_options["format"] = format if logical_type_options: prop.logicalTypeOptions = logical_type_options # Custom properties - if custom_properties: - prop.customProperties = [CustomProperty(property=k, value=v) for k, v in custom_properties.items()] + # Note: precision and scale are stored in customProperties because the ODCS schema + # does not allow them in logicalTypeOptions for the "number" logical type. + merged_custom = dict(custom_properties) if custom_properties else {} + if precision is not None: + merged_custom["precision"] = precision + if scale is not None: + merged_custom["scale"] = scale + if merged_custom: + prop.customProperties = [CustomProperty(property=k, value=v) for k, v in merged_custom.items()] return prop diff --git a/datacontract/imports/sql_importer.py b/datacontract/imports/sql_importer.py index 81673b299..cdd3ea05c 100644 --- a/datacontract/imports/sql_importer.py +++ b/datacontract/imports/sql_importer.py @@ -60,7 +60,6 @@ def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataCo logical_type = map_type_from_sql(col_type) col_description = get_description(column) max_length = get_max_length(column) - precision, scale = get_precision_scale(column) is_primary_key = get_primary_key(column) is_required = column.find(sqlglot.exp.NotNullColumnConstraint) is not None or None @@ -70,8 +69,6 @@ def import_sql(format: str, source: str, import_args: dict = None) -> OpenDataCo physical_type=col_type, description=col_description, max_length=max_length, - precision=precision, - scale=scale, primary_key=is_primary_key, primary_key_position=primary_key_position if is_primary_key else None, required=is_required if is_required else None, diff --git a/tests/fixtures/glue/datacontract.yaml b/tests/fixtures/glue/datacontract.yaml index 0080e5c6f..28a8ae1be 100644 --- a/tests/fixtures/glue/datacontract.yaml +++ b/tests/fixtures/glue/datacontract.yaml @@ -29,9 +29,11 @@ schema: - name: field_four physicalType: decimal logicalType: number - logicalTypeOptions: - precision: 6 - scale: 2 + customProperties: + - property: precision + value: 6 + - property: scale + value: 2 - name: field_five physicalType: struct logicalType: object diff --git a/tests/fixtures/oracle/import/ddl_with_precision.sql b/tests/fixtures/oracle/import/ddl_with_precision.sql new file mode 100644 index 000000000..2782682ed --- /dev/null +++ b/tests/fixtures/oracle/import/ddl_with_precision.sql @@ -0,0 +1,11 @@ +-- Test Oracle DDL with NUMBER(precision, scale) types +-- https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html + +CREATE TABLE customers +( + customer_id NUMBER(9, 0), -- Integer-like number with precision + customer_score NUMBER(5, 2), -- Decimal number with precision and scale + balance NUMBER(15, 4), -- Large decimal number + amount NUMBER(10), -- Number with only precision + plain_number NUMBER -- Plain number without precision or scale +) diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index 45c35b5f6..9acf3c60f 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -370,9 +370,11 @@ def test_import_avro_logical_types(): - name: some_bytes_decimal physicalType: bytes logicalType: number - logicalTypeOptions: - precision: 25 - scale: 2 + customProperties: + - property: precision + value: 25 + - property: scale + value: 2 required: true """ print("Result:\n", result.to_yaml()) diff --git a/tests/test_import_parquet.py b/tests/test_import_parquet.py index d9f78a88a..3196155d3 100644 --- a/tests/test_import_parquet.py +++ b/tests/test_import_parquet.py @@ -47,10 +47,12 @@ def test_import_parquet(): logicalType: boolean - name: decimal_field physicalType: DECIMAL + customProperties: + - property: precision + value: 10 + - property: scale + value: 2 logicalType: number - logicalTypeOptions: - precision: 10 - scale: 2 - name: float_field physicalType: FLOAT logicalType: number diff --git a/tests/test_import_sql_oracle.py b/tests/test_import_sql_oracle.py index 37ffc5604..4a6e7974c 100644 --- a/tests/test_import_sql_oracle.py +++ b/tests/test_import_sql_oracle.py @@ -228,3 +228,57 @@ def test_import_sql_constraints(): """ print("Result", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + + +def test_import_sql_oracle_number_precision_in_custom_properties(): + """Test that Oracle NUMBER(p, s) types store precision/scale in customProperties, not logicalTypeOptions.""" + result = DataContract.import_from_source( + "sql", "fixtures/oracle/import/ddl_with_precision.sql", dialect="oracle" + ) + + expected = """ +apiVersion: v3.1.0 +kind: DataContract +id: my-data-contract +name: My Data Contract +version: 1.0.0 +status: draft +servers: + - server: oracle + type: oracle +schema: + - name: customers + physicalType: table + logicalType: object + physicalName: customers + properties: + - name: customer_id + logicalType: number + physicalType: NUMBER(9, 0) + description: Integer-like number with precision + - name: customer_score + logicalType: number + physicalType: NUMBER(5, 2) + description: Decimal number with precision and scale + - name: balance + logicalType: number + physicalType: NUMBER(15, 4) + description: Large decimal number + - name: amount + logicalType: number + physicalType: NUMBER(10) + description: Number with only precision + - name: plain_number + logicalType: number + physicalType: NUMBER + description: Plain number without precision or scale + """ + print("Result", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + + # Verify precision and scale are NOT in logicalTypeOptions (which would violate ODCS schema) + parsed = yaml.safe_load(result.to_yaml()) + for prop in parsed["schema"][0]["properties"]: + log_type_opts = prop.get("logicalTypeOptions", {}) + assert "precision" not in log_type_opts, f"precision must not be in logicalTypeOptions for {prop['name']}" + assert "scale" not in log_type_opts, f"scale must not be in logicalTypeOptions for {prop['name']}" diff --git a/tests/test_import_sql_sqlserver.py b/tests/test_import_sql_sqlserver.py index 479421856..56599af3a 100644 --- a/tests/test_import_sql_sqlserver.py +++ b/tests/test_import_sql_sqlserver.py @@ -85,16 +85,10 @@ def test_import_sql_sqlserver(): description: Large integer (-9 quintillion to 9 quintillion) - name: field_decimal logicalType: number - logicalTypeOptions: - precision: 10 - scale: 2 physicalType: NUMERIC(10, 2) description: Fixed precision decimal - name: field_numeric logicalType: number - logicalTypeOptions: - precision: 10 - scale: 2 physicalType: NUMERIC(10, 2) description: Same as DECIMAL - name: field_float