Skip to content

Commit cd7e028

Browse files
authored
Merge branch 'datacontract:main' into main
2 parents 1cd452a + 3e3a91d commit cd7e028

6 files changed

Lines changed: 102 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
### Fixed
11+
12+
- Import composite primary keys from open data contract spec
13+
14+
## [0.10.38] - 2025-11-11
15+
1016
### Added
1117

1218
- Support for Oracle Database (>= 19C)
1319

1420
### Fixed
1521

22+
- Athena: Now correctly uses the (optional) AWS session token specified in the `DATACONTRACT_S3_SESSION_TOKEN' environment variable when testing contracts (#934)
1623

1724
## [0.10.37] - 2025-11-03
1825

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ s# Data Contract CLI
88
<a href="https://datacontract.com/slack" rel="nofollow"><img src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" alt="Slack Status" data-canonical-src="https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&amp;style=social" style="max-width: 100%;"></a>
99
</p>
1010

11-
The `datacontract` CLI is an open-source command-line tool for working with data contracts.
11+
The `datacontract` CLI is a popular and [recognized](https://www.thoughtworks.com/en-de/radar/tools/summary/data-contract-cli) open-source command-line tool for working with data contracts.
1212
It uses data contract YAML files as [Data Contract Specification](https://datacontract.com/) or [ODCS](https://bitol-io.github.io/open-data-contract-standard/latest/) to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
1313

1414
![Main features of the Data Contract CLI](datacontractcli.png)

datacontract/engines/soda/connections/athena.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def to_athena_soda_configuration(server):
7171
data_source["catalog"] = server.catalog
7272

7373
if s3_session_token:
74-
data_source["aws_session_token"] = s3_session_token
74+
data_source["session_token"] = s3_session_token
7575

7676
soda_configuration = {f"data_source {server.type}": data_source}
7777

datacontract/imports/odcs_v3_importer.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,17 @@ def get_server_type(odcs: OpenDataContractStandard) -> str | None:
197197
return server.type
198198

199199

200+
def get_composite_primary_keys(properties: List[SchemaProperty]) -> list[str]:
201+
primary_keys = [
202+
(property.name, property.primaryKeyPosition)
203+
for property in properties
204+
if property.name and property.primaryKey is not None and property.primaryKey
205+
]
206+
207+
primary_keys.sort(key=lambda x: x[1] or -1)
208+
return [name for name, _ in primary_keys]
209+
210+
200211
def import_models(odcs: Any) -> Dict[str, Model]:
201212
custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
202213

@@ -214,6 +225,8 @@ def import_models(odcs: Any) -> Dict[str, Model]:
214225
tags=odcs_schema.tags if odcs_schema.tags is not None else None,
215226
)
216227
model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
228+
if has_composite_primary_key(odcs_properties=odcs_schema.properties):
229+
model.primaryKey = get_composite_primary_keys(odcs_schema.properties)
217230
if odcs_schema.quality is not None:
218231
model.quality = convert_quality_list(odcs_schema.quality)
219232
model.title = schema_name

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "datacontract-cli"
3-
version = "0.10.37"
3+
version = "0.10.38"
44
description = "The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library."
55
license = "MIT"
66
readme = "README.md"
@@ -60,7 +60,7 @@ databricks = [
6060
"soda-core-spark-df>=3.3.20,<3.6.0",
6161
"soda-core-spark[databricks]>=3.3.20,<3.6.0",
6262
"databricks-sql-connector>=3.7.0,<4.2.0",
63-
"databricks-sdk<0.68.0",
63+
"databricks-sdk<0.74.0",
6464
"pyspark>=3.5.5,<4.0.0",
6565
]
6666

@@ -140,7 +140,7 @@ dev = [
140140
"kafka-python",
141141
"moto==5.1.13",
142142
"pandas>=2.1.0",
143-
"pre-commit>=3.7.1,<4.4.0",
143+
"pre-commit>=3.7.1,<4.5.0",
144144
"pytest",
145145
"pytest-xdist",
146146
"pymssql==2.3.8",

tests/fixtures/odcs_v3/adventureworks.datacontract.yml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ models:
239239
criticalDataElement: false
240240
partitioned: false
241241
physicalType: timestamp
242+
primaryKey:
243+
- businessentityid
244+
- departmentid
245+
- shiftid
246+
- startdate
242247
employeepayhistory:
243248
description: Employee pay history.
244249
type: table
@@ -283,6 +288,9 @@ models:
283288
criticalDataElement: false
284289
partitioned: false
285290
physicalType: timestamp
291+
primaryKey:
292+
- businessentityid
293+
- ratechangedate
286294
jobcandidate:
287295
description: Résumés submitted to Human Resources by job applicants.
288296
type: table
@@ -572,6 +580,10 @@ models:
572580
criticalDataElement: false
573581
partitioned: false
574582
physicalType: timestamp
583+
primaryKey:
584+
- businessentityid
585+
- addressid
586+
- addresstypeid
575587
businessentitycontact:
576588
description: Cross-reference table mapping stores, vendors, and employees to people
577589
type: table
@@ -615,6 +627,10 @@ models:
615627
criticalDataElement: false
616628
partitioned: false
617629
physicalType: timestamp
630+
primaryKey:
631+
- businessentityid
632+
- personid
633+
- contacttypeid
618634
contacttype:
619635
description: Lookup table containing the types of business entity contacts.
620636
type: table
@@ -722,6 +738,9 @@ models:
722738
criticalDataElement: false
723739
partitioned: false
724740
physicalType: timestamp
741+
primaryKey:
742+
- businessentityid
743+
- emailaddressid
725744
password:
726745
description: One way hashed authentication information
727746
type: table
@@ -936,6 +955,10 @@ models:
936955
criticalDataElement: false
937956
partitioned: false
938957
physicalType: timestamp
958+
primaryKey:
959+
- businessentityid
960+
- phonenumber
961+
- phonenumbertypeid
939962
phonenumbertype:
940963
description: Type of phone number of a person.
941964
type: table
@@ -1677,6 +1700,9 @@ models:
16771700
criticalDataElement: false
16781701
partitioned: false
16791702
physicalType: timestamp
1703+
primaryKey:
1704+
- productid
1705+
- startdate
16801706
productdescription:
16811707
description: Product descriptions in several languages.
16821708
type: table
@@ -1744,6 +1770,9 @@ models:
17441770
criticalDataElement: false
17451771
partitioned: false
17461772
physicalType: varchar[2147483647]
1773+
primaryKey:
1774+
- productid
1775+
- documentnode
17471776
productinventory:
17481777
description: Product inventory information.
17491778
type: table
@@ -1803,6 +1832,9 @@ models:
18031832
criticalDataElement: false
18041833
partitioned: false
18051834
physicalType: timestamp
1835+
primaryKey:
1836+
- productid
1837+
- locationid
18061838
productlistpricehistory:
18071839
description: Changes in the list price of a product over time.
18081840
type: table
@@ -1847,6 +1879,9 @@ models:
18471879
criticalDataElement: false
18481880
partitioned: false
18491881
physicalType: timestamp
1882+
primaryKey:
1883+
- productid
1884+
- startdate
18501885
productmodel:
18511886
description: Product model classification.
18521887
type: table
@@ -1932,6 +1967,9 @@ models:
19321967
criticalDataElement: false
19331968
partitioned: false
19341969
physicalType: timestamp
1970+
primaryKey:
1971+
- productmodelid
1972+
- illustrationid
19351973
productmodelproductdescriptionculture:
19361974
description: Cross-reference table mapping product descriptions and the language
19371975
the description is written in.
@@ -1969,6 +2007,10 @@ models:
19692007
criticalDataElement: false
19702008
partitioned: false
19712009
physicalType: timestamp
2010+
primaryKey:
2011+
- productmodelid
2012+
- productdescriptionid
2013+
- cultureid
19722014
productphoto:
19732015
description: Product images.
19742016
type: table
@@ -2064,6 +2106,9 @@ models:
20642106
criticalDataElement: false
20652107
partitioned: false
20662108
physicalType: timestamp
2109+
primaryKey:
2110+
- productid
2111+
- productphotoid
20672112
productreview:
20682113
description: Customer reviews of products they have purchased.
20692114
type: table
@@ -2610,6 +2655,10 @@ models:
26102655
criticalDataElement: false
26112656
partitioned: false
26122657
physicalType: timestamp
2658+
primaryKey:
2659+
- workorderid
2660+
- productid
2661+
- operationsequence
26132662
productvendor:
26142663
description: Cross-reference table mapping vendors with the products they supply.
26152664
type: table
@@ -2703,6 +2752,9 @@ models:
27032752
criticalDataElement: false
27042753
partitioned: false
27052754
physicalType: timestamp
2755+
primaryKey:
2756+
- productid
2757+
- businessentityid
27062758
purchaseorderdetail:
27072759
description: Individual products associated with a specific purchase order. See
27082760
PurchaseOrderHeader.
@@ -2780,6 +2832,9 @@ models:
27802832
criticalDataElement: false
27812833
partitioned: false
27822834
physicalType: timestamp
2835+
primaryKey:
2836+
- purchaseorderid
2837+
- purchaseorderdetailid
27832838
purchaseorderheader:
27842839
description: General purchase order information. See PurchaseOrderDetail.
27852840
type: table
@@ -3061,6 +3116,9 @@ models:
30613116
criticalDataElement: false
30623117
partitioned: false
30633118
physicalType: timestamp
3119+
primaryKey:
3120+
- countryregioncode
3121+
- currencycode
30643122
creditcard:
30653123
description: Customer credit card information.
30663124
type: table
@@ -3304,6 +3362,9 @@ models:
33043362
criticalDataElement: false
33053363
partitioned: false
33063364
physicalType: timestamp
3365+
primaryKey:
3366+
- businessentityid
3367+
- creditcardid
33073368
salesorderdetail:
33083369
description: Individual products associated with a specific sales order. See SalesOrderHeader.
33093370
type: table
@@ -3387,6 +3448,9 @@ models:
33873448
criticalDataElement: false
33883449
partitioned: false
33893450
physicalType: timestamp
3451+
primaryKey:
3452+
- salesorderid
3453+
- salesorderdetailid
33903454
salesorderheader:
33913455
description: General sales order information.
33923456
type: table
@@ -3646,6 +3710,9 @@ models:
36463710
criticalDataElement: false
36473711
partitioned: false
36483712
physicalType: timestamp
3713+
primaryKey:
3714+
- salesorderid
3715+
- salesreasonid
36493716
salesperson:
36503717
description: Sales representative current information.
36513718
type: table
@@ -3773,6 +3840,9 @@ models:
37733840
criticalDataElement: false
37743841
partitioned: false
37753842
physicalType: timestamp
3843+
primaryKey:
3844+
- businessentityid
3845+
- quotadate
37763846
salesreason:
37773847
description: Lookup table of customer purchase reasons.
37783848
type: table
@@ -4026,6 +4096,10 @@ models:
40264096
criticalDataElement: false
40274097
partitioned: false
40284098
physicalType: timestamp
4099+
primaryKey:
4100+
- businessentityid
4101+
- territoryid
4102+
- startdate
40294103
shoppingcartitem:
40304104
description: Contains online customer orders until the order is submitted or cancelled.
40314105
type: table
@@ -4221,6 +4295,9 @@ models:
42214295
criticalDataElement: false
42224296
partitioned: false
42234297
physicalType: timestamp
4298+
primaryKey:
4299+
- specialofferid
4300+
- productid
42244301
store:
42254302
description: Customers (resellers) of Adventure Works products.
42264303
type: table

0 commit comments

Comments
 (0)