Skip to content

Commit 9c31539

Browse files
Add files via upload
1 parent f9b8264 commit 9c31539

1 file changed

Lines changed: 387 additions & 0 deletions

File tree

Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {
6+
"application/vnd.databricks.v1+cell": {
7+
"cellMetadata": {},
8+
"inputWidgets": {},
9+
"nuid": "2b77ac5e-1de0-4c91-bcfb-9b9a8650e2d5",
10+
"showTitle": false,
11+
"tableResultSettingsMap": {},
12+
"title": ""
13+
}
14+
},
15+
"source": [
16+
"## Lakehouse Federation \n",
17+
"This notebook are the examples from this [video](https://youtube.com/shorts/WAB_d5E-Auw) and gives the example of querying postgres via Databricks. It's not the only thing you can query, as of right now your options are:\n",
18+
"\n",
19+
"* MySQL\n",
20+
"* PostgreSQL\n",
21+
"* Oracle\n",
22+
"* Amazon Redshift\n",
23+
"* Salesforce Data Cloud\n",
24+
"* Snowflake\n",
25+
"* Microsoft SQL Server\n",
26+
"* Azure Synapse (SQL Data Warehouse)\n",
27+
"* Google BigQuery\n",
28+
"* Other Databricks Workspaces\n",
29+
"* Hive metastore \n",
30+
"\n",
31+
"Link to the formal docs [here](https://docs.databricks.com/en/query-federation/index.html)\n",
32+
"\n",
33+
"Compute used: serverless warehouse or classic cluster with runtime 16.1\n"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 0,
39+
"metadata": {
40+
"application/vnd.databricks.v1+cell": {
41+
"cellMetadata": {},
42+
"inputWidgets": {},
43+
"nuid": "9a7dbbad-bb0b-49cf-b8fd-cd920f9f8aa2",
44+
"showTitle": true,
45+
"tableResultSettingsMap": {},
46+
"title": "Creating a Postgres connection"
47+
}
48+
},
49+
"outputs": [],
50+
"source": [
51+
"CREATE CONNECTION postgres_test TYPE postgrsql\n",
52+
"OPTIONS (\n",
53+
" host 'k8s-hadronco-abcdef01-234567890abc-def0123456.elb.us-west-2.amazonaws.com', --not actually real\n",
54+
" port '5432',\n",
55+
" user 'holly',\n",
56+
" password 'password1' --use scopes for IRL code\n",
57+
")"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 0,
63+
"metadata": {
64+
"application/vnd.databricks.v1+cell": {
65+
"cellMetadata": {
66+
"byteLimit": 2048000,
67+
"rowLimit": 10000
68+
},
69+
"inputWidgets": {},
70+
"nuid": "2c1280e2-0307-4029-958c-9469a4ecf47e",
71+
"showTitle": true,
72+
"tableResultSettingsMap": {},
73+
"title": "Create Foreign Catalog"
74+
}
75+
},
76+
"outputs": [],
77+
"source": [
78+
"CREATE FOREIGN CATALOG postgres_test_catalog\n",
79+
"USING CONNECTION postgres_test\n",
80+
"OPTIONS (databse 'holly'); --this is the database name in postgres"
81+
]
82+
},
83+
{
84+
"cell_type": "code",
85+
"execution_count": 0,
86+
"metadata": {
87+
"application/vnd.databricks.v1+cell": {
88+
"cellMetadata": {
89+
"byteLimit": 2048000,
90+
"rowLimit": 10000
91+
},
92+
"inputWidgets": {},
93+
"nuid": "129159df-7ad8-4115-8de8-c3e4bb62d25f",
94+
"showTitle": true,
95+
"tableResultSettingsMap": {},
96+
"title": "Query Postgres"
97+
}
98+
},
99+
"outputs": [
100+
{
101+
"output_type": "display_data",
102+
"data": {
103+
"text/html": [
104+
"<style scoped>\n",
105+
" .table-result-container {\n",
106+
" max-height: 300px;\n",
107+
" overflow: auto;\n",
108+
" }\n",
109+
" table, th, td {\n",
110+
" border: 1px solid black;\n",
111+
" border-collapse: collapse;\n",
112+
" }\n",
113+
" th, td {\n",
114+
" padding: 5px;\n",
115+
" }\n",
116+
" th {\n",
117+
" text-align: left;\n",
118+
" }\n",
119+
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ham_type</th><th>is_edible</th></tr></thead><tbody><tr><td>sandwich</td><td>true</td></tr><tr><td>West</td><td>false</td></tr></tbody></table></div>"
120+
]
121+
},
122+
"metadata": {
123+
"application/vnd.databricks.v1+output": {
124+
"addedWidgets": {},
125+
"aggData": [],
126+
"aggError": "",
127+
"aggOverflow": false,
128+
"aggSchema": [],
129+
"aggSeriesLimitReached": false,
130+
"aggType": "",
131+
"arguments": {},
132+
"columnCustomDisplayInfos": {},
133+
"data": [
134+
[
135+
"sandwich",
136+
true
137+
],
138+
[
139+
"West",
140+
false
141+
]
142+
],
143+
"datasetInfos": [],
144+
"dbfsResultPath": null,
145+
"isJsonSchema": true,
146+
"metadata": {
147+
"isDbfsCommandResult": false
148+
},
149+
"overflow": false,
150+
"plotOptions": {
151+
"customPlotOptions": {},
152+
"displayType": "table",
153+
"pivotAggregation": null,
154+
"pivotColumns": null,
155+
"xColumns": null,
156+
"yColumns": null
157+
},
158+
"removedWidgets": [],
159+
"schema": [
160+
{
161+
"metadata": "{\"isTimestampNTZ\":false,\"signed\":false,\"scale\":0}",
162+
"name": "ham_type",
163+
"type": "\"string\""
164+
},
165+
{
166+
"metadata": "{\"isTimestampNTZ\":false,\"signed\":false,\"scale\":0}",
167+
"name": "is_edible",
168+
"type": "\"boolean\""
169+
}
170+
],
171+
"type": "table"
172+
}
173+
},
174+
"output_type": "display_data"
175+
}
176+
],
177+
"source": [
178+
"SELECT * FROM postgres_test_catalog.federated_schema.ham_types"
179+
]
180+
},
181+
{
182+
"cell_type": "code",
183+
"execution_count": 0,
184+
"metadata": {
185+
"application/vnd.databricks.v1+cell": {
186+
"cellMetadata": {
187+
"byteLimit": 2048000,
188+
"rowLimit": 10000
189+
},
190+
"inputWidgets": {},
191+
"nuid": "d6f12188-4897-4ec2-b5b1-bd0df1db133f",
192+
"showTitle": true,
193+
"tableResultSettingsMap": {},
194+
"title": "Create Materialised View"
195+
}
196+
},
197+
"outputs": [
198+
{
199+
"output_type": "display_data",
200+
"data": {
201+
"text/html": [
202+
"<style scoped>\n",
203+
" .table-result-container {\n",
204+
" max-height: 300px;\n",
205+
" overflow: auto;\n",
206+
" }\n",
207+
" table, th, td {\n",
208+
" border: 1px solid black;\n",
209+
" border-collapse: collapse;\n",
210+
" }\n",
211+
" th, td {\n",
212+
" padding: 5px;\n",
213+
" }\n",
214+
" th {\n",
215+
" text-align: left;\n",
216+
" }\n",
217+
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>result</th></tr></thead><tbody><tr><td>The operation was successfully executed.</td></tr></tbody></table></div>"
218+
]
219+
},
220+
"metadata": {
221+
"application/vnd.databricks.v1+output": {
222+
"addedWidgets": {},
223+
"aggData": [],
224+
"aggError": "",
225+
"aggOverflow": false,
226+
"aggSchema": [],
227+
"aggSeriesLimitReached": false,
228+
"aggType": "",
229+
"arguments": {},
230+
"columnCustomDisplayInfos": {},
231+
"data": [
232+
[
233+
"The operation was successfully executed."
234+
]
235+
],
236+
"datasetInfos": [],
237+
"dbfsResultPath": null,
238+
"isJsonSchema": true,
239+
"metadata": {},
240+
"overflow": false,
241+
"plotOptions": {
242+
"customPlotOptions": {},
243+
"displayType": "table",
244+
"pivotAggregation": null,
245+
"pivotColumns": null,
246+
"xColumns": null,
247+
"yColumns": null
248+
},
249+
"removedWidgets": [],
250+
"schema": [
251+
{
252+
"metadata": "{}",
253+
"name": "result",
254+
"type": "\"string\""
255+
}
256+
],
257+
"type": "table"
258+
}
259+
},
260+
"output_type": "display_data"
261+
}
262+
],
263+
"source": [
264+
"CREATE MATERIALIZED VIEW postgresql_mv\n",
265+
"SCHEDULE EVERY 1 HOUR\n",
266+
"AS SELECT * FROM postgres_test_catalog.federated_schema.ham_types"
267+
]
268+
},
269+
{
270+
"cell_type": "code",
271+
"execution_count": 0,
272+
"metadata": {
273+
"application/vnd.databricks.v1+cell": {
274+
"cellMetadata": {
275+
"byteLimit": 2048000,
276+
"rowLimit": 10000
277+
},
278+
"inputWidgets": {},
279+
"nuid": "8a09b057-0265-4b71-a233-de3da64612bf",
280+
"showTitle": true,
281+
"tableResultSettingsMap": {},
282+
"title": "Append to Bronze Table"
283+
}
284+
},
285+
"outputs": [
286+
{
287+
"output_type": "display_data",
288+
"data": {
289+
"text/html": [
290+
"<style scoped>\n",
291+
" .table-result-container {\n",
292+
" max-height: 300px;\n",
293+
" overflow: auto;\n",
294+
" }\n",
295+
" table, th, td {\n",
296+
" border: 1px solid black;\n",
297+
" border-collapse: collapse;\n",
298+
" }\n",
299+
" th, td {\n",
300+
" padding: 5px;\n",
301+
" }\n",
302+
" th {\n",
303+
" text-align: left;\n",
304+
" }\n",
305+
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>num_affected_rows</th><th>num_inserted_rows</th></tr></thead><tbody><tr><td>2</td><td>2</td></tr></tbody></table></div>"
306+
]
307+
},
308+
"metadata": {
309+
"application/vnd.databricks.v1+output": {
310+
"addedWidgets": {},
311+
"aggData": [],
312+
"aggError": "",
313+
"aggOverflow": false,
314+
"aggSchema": [],
315+
"aggSeriesLimitReached": false,
316+
"aggType": "",
317+
"arguments": {},
318+
"columnCustomDisplayInfos": {},
319+
"data": [
320+
[
321+
2,
322+
2
323+
]
324+
],
325+
"datasetInfos": [],
326+
"dbfsResultPath": null,
327+
"isJsonSchema": true,
328+
"metadata": {},
329+
"overflow": false,
330+
"plotOptions": {
331+
"customPlotOptions": {},
332+
"displayType": "table",
333+
"pivotAggregation": null,
334+
"pivotColumns": null,
335+
"xColumns": null,
336+
"yColumns": null
337+
},
338+
"removedWidgets": [],
339+
"schema": [
340+
{
341+
"metadata": "{}",
342+
"name": "num_affected_rows",
343+
"type": "\"long\""
344+
},
345+
{
346+
"metadata": "{}",
347+
"name": "num_inserted_rows",
348+
"type": "\"long\""
349+
}
350+
],
351+
"type": "table"
352+
}
353+
},
354+
"output_type": "display_data"
355+
}
356+
],
357+
"source": [
358+
"--create an empty table before inserting into\n",
359+
"INSERT INTO postgres_bronze\n",
360+
"SELECT *, --yes I know not best practice\n",
361+
"current_timestamp() as ingest_time\n",
362+
"FROM postgres_test_catalog.federated_schema.ham_types"
363+
]
364+
}
365+
],
366+
"metadata": {
367+
"application/vnd.databricks.v1+notebook": {
368+
"computePreferences": null,
369+
"dashboards": [],
370+
"environmentMetadata": {
371+
"base_environment": "",
372+
"client": "2"
373+
},
374+
"language": "sql",
375+
"notebookMetadata": {
376+
"pythonIndentUnit": 4
377+
},
378+
"notebookName": "🤝 Connect to more data with Lakehouse Federation",
379+
"widgets": {}
380+
},
381+
"language_info": {
382+
"name": "sql"
383+
}
384+
},
385+
"nbformat": 4,
386+
"nbformat_minor": 0
387+
}

0 commit comments

Comments
 (0)