diff --git a/.gitignore b/.gitignore
index e567857..4a61ab7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 .jupyter_cache
 jupyter_execute
 uv.lock
+nyc_yellow_taxi_2025-01.parquet
diff --git a/content/01_table_dataframe.svg b/content/01_table_dataframe.svg
new file mode 100644
index 0000000..9bd1c21
--- /dev/null
+++ b/content/01_table_dataframe.svg
@@ -0,0 +1,262 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="143.19496mm"
+   height="104.30615mm"
+   viewBox="0 0 143.19496 104.30615"
+   version="1.1"
+   id="svg10280"
+   inkscape:version="0.92.4 (f8dce91, 2019-08-02)"
+   sodipodi:docname="01_table_dataframe.svg">
+  <defs
+     id="defs10274" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="0.7"
+     inkscape:cx="355.65317"
+     inkscape:cy="142.80245"
+     inkscape:document-units="mm"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1551"
+     inkscape:window-height="849"
+     inkscape:window-x="49"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0" />
+  <metadata
+     id="metadata10277">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-22.613419,-96.097789)">
+    <g
+       id="g888"
+       transform="matrix(0.89990753,0,0,0.9,9.4364163,14.825088)"
+       style="stroke-width:1.11116815">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 23.647349,141.16281 H 48.53529 V 128.97485 H 23.647349 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 50.063339,127.4468 H 74.951291 V 115.25884 H 50.063339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 50.063339,141.16281 H 74.951291 V 128.97485 H 50.063339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-2"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 23.647339,153.86281 H 48.53529 V 141.67486 H 23.647339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7-5"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 50.063339,153.86281 H 74.951291 V 141.67486 H 50.063339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-6"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 75.463341,127.4468 H 100.3513 V 115.25884 H 75.463341 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-5"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 75.463341,141.16281 H 100.3513 V 128.97485 H 75.463341 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-2-3"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 75.463341,153.86281 H 100.3513 V 141.67486 H 75.463341 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-7-5"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 23.647349,179.26284 H 48.53529 V 167.07487 H 23.647349 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-0"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 50.063339,179.26284 H 74.951291 V 167.07487 H 50.063339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-7-8-5"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 75.463341,179.26284 H 100.3513 V 167.07487 H 75.463341 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-7-5-2"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 23.647349,191.96284 H 48.53529 V 179.77488 H 23.647349 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-0-7"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 50.063339,191.96284 H 74.951291 V 179.77488 H 50.063339 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-7-8-5-2"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="M 75.463341,191.96284 H 100.3513 V 179.77488 H 75.463341 Z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7-8"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 100.86334,127.4468 h 24.88794 v -12.18796 h -24.88794 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-4"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 100.86335,141.16281 h 24.88793 v -12.18796 h -24.88793 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7-5-5"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 100.86334,153.86281 h 24.88794 v -12.18795 h -24.88794 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-2-7"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 100.86335,179.26284 h 24.88793 v -12.18797 h -24.88793 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-2-7-1"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 100.86335,191.96284 h 24.88793 v -12.18796 h -24.88793 z" />
+      <g
+         id="g935"
+         style="stroke-width:1.11116815">
+        <path
+           d="M 23.647349,166.56283 H 48.53529 V 154.37487 H 23.647349 Z"
+           style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+           id="path4891-1-50-8-2-1-7"
+           inkscape:connector-curvature="0" />
+        <path
+           d="M 50.063339,166.56283 H 74.951291 V 154.37487 H 50.063339 Z"
+           style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+           id="path4891-1-50-8-2-1-9-8"
+           inkscape:connector-curvature="0" />
+        <path
+           d="M 75.463341,166.56283 H 100.3513 V 154.37487 H 75.463341 Z"
+           style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+           id="path4891-1-50-8-2-1-7-8"
+           inkscape:connector-curvature="0" />
+        <path
+           d="m 100.86335,166.56283 h 24.88793 v -12.18796 h -24.88793 z"
+           style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+           id="path4891-1-50-8-2-1-9-8-2"
+           inkscape:connector-curvature="0" />
+        <path
+           d="m 126.26334,166.56283 h 24.88792 v -12.18796 h -24.88792 z"
+           style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+           id="path4891-1-50-8-2-1-9-8-2-4"
+           inkscape:connector-curvature="0" />
+      </g>
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7-8-2"
+         style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 126.26333,127.4468 h 24.88793 v -12.18796 h -24.88793 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-4-2"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 126.26334,141.16281 h 24.88792 v -12.18796 h -24.88792 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-7-5-5-1"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 126.26333,153.86281 h 24.88793 v -12.18795 h -24.88793 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-2-7-6"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 126.26334,179.26284 h 24.88792 v -12.18797 h -24.88792 z" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4891-1-50-8-2-1-9-8-2-7-1-2"
+         style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+         d="m 126.26334,191.96284 h 24.88792 v -12.18796 h -24.88792 z" />
+      <text
+         id="text47247-9"
+         y="200.30403"
+         x="100.55013"
+         style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
+         xml:space="preserve"><tspan
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05555534px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
+           y="200.30403"
+           x="100.55013"
+           id="tspan47245-7"
+           sodipodi:role="line">column</tspan></text>
+      <text
+         id="text47247-1"
+         y="103.81308"
+         x="76.306671"
+         style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
+         xml:space="preserve"><tspan
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
+           y="103.81308"
+           x="76.306671"
+           id="tspan47245-3"
+           sodipodi:role="line">DataFrame</tspan></text>
+      <rect
+         y="113.61689"
+         x="100.55073"
+         height="79.987907"
+         width="25.513165"
+         id="rect850"
+         style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.29399657;stroke-opacity:1" />
+      <rect
+         y="154.10715"
+         x="22.74571"
+         height="12.771029"
+         width="129.30719"
+         id="rect850-3"
+         style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.29399657;stroke-opacity:1" />
+      <text
+         id="text47247-9-6"
+         y="162.41847"
+         x="153.07187"
+         style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
+         xml:space="preserve"><tspan
+           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05555534px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
+           y="162.41847"
+           x="153.07187"
+           id="tspan47245-7-7"
+           sodipodi:role="line">row</tspan></text>
+    </g>
+  </g>
+</svg>
diff --git a/content/benchmarking.md b/content/benchmarking.md
new file mode 100644
index 0000000..1dad901
--- /dev/null
+++ b/content/benchmarking.md
@@ -0,0 +1,80 @@
+# Benchmarking
+
+:::{questions}
+
+- What syntax is used to make a lesson?
+- How do you structure a lesson effectively for teaching?
+- `questions` are at the top of a lesson and provide a starting
+  point for what you might learn.  It is usually a bulleted list.
+:::
+
+:::{objectives}
+
+- Show a complete lesson page with all of the most common
+  structures.
+- ...
+
+This is also a holdover from the carpentries-style.  It could
+usually be left off.
+:::
+
+The introduction should be a high level overview of what is on the
+page and why it is interesting.
+
+The lines below (only in the source) will set the default highlighting
+language for the entire page.
+
+:::{highlight} python
+:::
+
+## Section
+
+A section.
+
+:::{discussion}
+Discuss the following.
+
+- A discussion section
+- Another discussion topic
+:::
+
+## Section
+
+```
+print("hello world")
+# This uses the default highlighting language
+```
+
+```python
+print("hello world)
+```
+
+## Exercises: description
+
+:::{exercise} Exercise Topic-1: imperative description of exercise
+Exercise text here.
+:::
+
+:::{solution}
+Solution text here
+:::
+
+## Summary
+
+A Summary of what you learned and why it might be useful.  Maybe a
+hint of what comes next.
+
+## See also
+
+- Other relevant links
+- Other link
+
+:::{keypoints}
+
+- What the learner should take away
+- point 2
+- ...
+
+This is another holdover from the carpentries style.  This perhaps
+is better done in a "summary" section.
+:::
diff --git a/content/conf.py b/content/conf.py
index 7c33d39..bd76f25 100644
--- a/content/conf.py
+++ b/content/conf.py
@@ -14,14 +14,14 @@
 # -- Project information -----------------------------------------------------
 
 # FIXME: choose title
-project = "Your lesson name"
+project = "Python for High Performance Data Analytics"
 # FIXME: insert correct author
-author = "The contributors"
+author = "Francesco Fiusco, Qiang Li, Ashwin Mohanan, Juan Triviño, Yonglei Wang"
 copyright = f"2025, ENCCS, {author}"
 
 # FIXME: github organization / user that the repository belongs to
 github_user = "ENCCS"
-github_repo_name = ""  # auto-detected from dirname if blank
+github_repo_name = "python-for-hpda"  # auto-detected from dirname if blank
 github_version = "main"
 conf_py_path = "/content/"  # with leading and trailing slash
 
diff --git a/content/dask.md b/content/dask.md
new file mode 100644
index 0000000..222131e
--- /dev/null
+++ b/content/dask.md
@@ -0,0 +1,93 @@
+# Dask
+
+:::{questions}
+- What syntax is used to make a lesson?
+- How do you structure a lesson effectively for teaching?
+- `questions` are at the top of a lesson and provide a starting
+  point for what you might learn.  It is usually a bulleted list.
+:::
+
+:::{objectives}
+- Show a complete lesson page with all of the most common
+  structures.
+- ...
+
+This is also a holdover from the carpentries-style.  It could
+usually be left off.
+:::
+
+
+
+
+The introduction should be a high level overview of what is on the
+page and why it is interesting.
+
+
+The lines below (only in the source) will set the default highlighting
+language for the entire page.
+
+:::{highlight} python
+:::
+
+
+
+## Section
+
+A section.
+
+:::{discussion}
+Discuss the following.
+
+- A discussion section
+- Another discussion topic
+:::
+
+
+
+## Section
+
+```
+print("hello world")
+# This uses the default highlighting language
+```
+
+```python
+print("hello world)
+```
+
+
+
+## Exercises: description
+
+:::{exercise} Exercise Topic-1: imperative description of exercise
+Exercise text here.
+:::
+
+:::{solution}
+Solution text here
+:::
+
+
+
+## Summary
+
+A Summary of what you learned and why it might be useful.  Maybe a
+hint of what comes next.
+
+
+
+## See also
+
+- Other relevant links
+- Other link
+
+
+
+:::{keypoints}
+- What the learner should take away
+- point 2
+- ...
+
+This is another holdover from the carpentries style.  This perhaps
+is better done in a "summary" section.
+:::
diff --git a/content/groupby.png b/content/groupby.png
new file mode 100644
index 0000000..4786855
Binary files /dev/null and b/content/groupby.png differ
diff --git a/content/index.md b/content/index.md
index 2e8332c..7e6dd4e 100644
--- a/content/index.md
+++ b/content/index.md
@@ -1,12 +1,11 @@
-# LESSON NAME
-
-Intro
+# Python for High Performance Data Analytics
 
 :::{prereq}
 
-- FIXME
-- ...
-- ...
+- Basic proficiency in Python (variables, flow control, functions)
+- Basic grasp of descriptive statistics (such as minimum, maximum, median, arithmetic mean...)
+- Basic knowledge of NumPy
+- Basic knowledge of some plotting package (Matplotlib, Seaborn, Holoviz...)
   :::
 
 ```{csv-table}
@@ -20,7 +19,13 @@ Intro
 :caption: The lesson
 :maxdepth: 1
 
-episode.md
+tabular-data
+interfacing-with-storage
+visualisation
+benchmarking
+multithreading
+dask
+
 ```
 
 ```{toctree}
@@ -31,25 +36,59 @@ quick-reference
 guide
 ```
 
-## Learning outcomes
+## What to expect from this course
+
+:::{discussion}
+
+How large are the datasets you are working with?
 
-FIXME
+:::
+
+Both for classical machine/deep learning and (generative) AI, the amount of
+data needed to train ever-growing models is becoming bigger and bigger.
+Moreover, great strides in both hardware and software development for high
+performance computing (HPC) applications allow for large scale computations
+that were not possible before.
+This course focuses on high performance data analytics (HPDA). The data
+can come from simulations or experiments (or just generally available
+datasets), and the goal is to pre-process, analyse and visualise it.
+The lesson introduces some of the modern Python stack for data analytics,
+dealing with packages such as Pandas, Polars, multithreading
+and Dask, as well as Streamlit for large-scale data visualisations.
 
-This material is for ...
+## Learning outcomes
 
-By the end of this module, learners should:
+This lesson provides a broad overview of methods to work with large datasets
+using tools and libraries from the Python ecosystem. Since this field is
+fairly extensive, we will try to expose just enough details on each topic
+for you to get a good idea of the picture and an understanding of what
+combination of tools and libraries will work well for your particular use
+case.
 
-- ...
-- ...
+Specifically, this lesson covers:
+
+- Tools for efficiently storing data and writing/reading it to/from disk
+- Interfacing with databases and object storage solutions
+- Main libraries to work with arrays and tabular data
+- Performance monitoring and benchmarking
+- Workload parallelisation: threads and Dask
 
 ## See also
 
 :::{admonition} Credit
 :class: warning
 
-FIXME
+Don't forget to check out additional course materials from the
+[Data carpentry](https://datacarpentry.org/lessons/), such as:
+
+- [Data Analysis and Visualization in Python for Ecologists](https://datacarpentry.github.io/python-ecology-lesson/)
+- [10 minutes to pandas](https://pandas.pydata.org/pandas-docs/version/1.3/user_guide/10min.html#minutes-to-pandas)
+- [Modern Pandas (blog series by Tom Augspurger)](https://tomaugspurger.net/posts/modern-1-intro/)
 
-Don't forget to check out additional course materials from ...
+Moreover, the Polars [documentation](https://docs.pola.rs/) and
+[Awesome data science with Python](https://github.com/r0f1/datascience)
+are valuable resources, as well as
+[PythonSpeed](https://pythonspeed.com/datascience/#pandas).
 
 :::
 
diff --git a/content/interfacing-with-storage.md b/content/interfacing-with-storage.md
new file mode 100644
index 0000000..5031f59
--- /dev/null
+++ b/content/interfacing-with-storage.md
@@ -0,0 +1,80 @@
+# Storage & serialisation backends
+
+:::{questions}
+
+- What syntax is used to make a lesson?
+- How do you structure a lesson effectively for teaching?
+- `questions` are at the top of a lesson and provide a starting
+  point for what you might learn.  It is usually a bulleted list.
+:::
+
+:::{objectives}
+
+- Show a complete lesson page with all of the most common
+  structures.
+- ...
+
+This is also a holdover from the carpentries-style.  It could
+usually be left off.
+:::
+
+The introduction should be a high level overview of what is on the
+page and why it is interesting.
+
+The lines below (only in the source) will set the default highlighting
+language for the entire page.
+
+:::{highlight} python
+:::
+
+## Section
+
+A section.
+
+:::{discussion}
+Discuss the following.
+
+- A discussion section
+- Another discussion topic
+:::
+
+## Section
+
+```
+print("hello world")
+# This uses the default highlighting language
+```
+
+```python
+print("hello world)
+```
+
+## Exercises: description
+
+:::{exercise} Exercise Topic-1: imperative description of exercise
+Exercise text here.
+:::
+
+:::{solution}
+Solution text here
+:::
+
+## Summary
+
+A Summary of what you learned and why it might be useful.  Maybe a
+hint of what comes next.
+
+## See also
+
+- Other relevant links
+- Other link
+
+:::{keypoints}
+
+- What the learner should take away
+- point 2
+- ...
+
+This is another holdover from the carpentries style.  This perhaps
+is better done in a "summary" section.
+:::
diff --git a/content/multithreading.md b/content/multithreading.md
new file mode 100644
index 0000000..b835806
--- /dev/null
+++ b/content/multithreading.md
@@ -0,0 +1,80 @@
+# Multithreading
+
+:::{questions}
+
+- What syntax is used to make a lesson?
+- How do you structure a lesson effectively for teaching?
+- `questions` are at the top of a lesson and provide a starting
+  point for what you might learn.  It is usually a bulleted list.
+:::
+
+:::{objectives}
+
+- Show a complete lesson page with all of the most common
+  structures.
+- ...
+
+This is also a holdover from the carpentries-style.  It could
+usually be left off.
+:::
+
+The introduction should be a high level overview of what is on the
+page and why it is interesting.
+
+The lines below (only in the source) will set the default highlighting
+language for the entire page.
+
+:::{highlight} python
+:::
+
+## Section
+
+A section.
+
+:::{discussion}
+Discuss the following.
+
+- A discussion section
+- Another discussion topic
+:::
+
+## Section
+
+```
+print("hello world")
+# This uses the default highlighting language
+```
+
+```python
+print("hello world)
+```
+
+## Exercises: description
+
+:::{exercise} Exercise Topic-1: imperative description of exercise
+Exercise text here.
+:::
+
+:::{solution}
+Solution text here
+:::
+
+## Summary
+
+A Summary of what you learned and why it might be useful.  Maybe a
+hint of what comes next.
+
+## See also
+
+- Other relevant links
+- Other link
+
+:::{keypoints}
+
+- What the learner should take away
+- point 2
+- ...
+
+This is another holdover from the carpentries style.  This perhaps
+is better done in a "summary" section.
+:::
diff --git a/content/tabular-data.md b/content/tabular-data.md
new file mode 100644
index 0000000..f81be14
--- /dev/null
+++ b/content/tabular-data.md
@@ -0,0 +1,999 @@
+# Tabular data (aka Dataframes)
+
+:::{questions}
+
+- What are series and dataframes?
+- What do we mean by tidy and untidy data?
+- What packages are available in Python to handle dataframes?
+
+:::
+
+:::{objectives}
+
+- Learn how to manipulate dataframes in Pandas
+- Lazy and eager dataframes in Polars
+
+:::
+
+:::{highlight} python
+:::
+
+This episode will give an introduction to the concepts of *Series* and *DataFrame*
+and how they can be manipulated using different Python packages.
+
+## Series and dataframes
+
+A collection of observations (e.g. a time series or simply a set of observations
+of a feature of a phenomenon) can be represented by a homogeneous vector, i.e.
+an array where all the elements are of the same type. This is known as a *Series*
+in many frameworks. Several series (of different types) can be used as columns of
+a tabular structure called a *Dataframe*, as depicted in the figure below.
+
+![Structure of dataframe](01_table_dataframe.svg)
+
+### Tidy vs untidy dataframes
+
+Let us look at the following two dataframes:
+
+::::{tabs}
+:::{group-tab} Untidy format
+
+```{csv-table}
+:delim: ;
+# ; Runner ; 400 ; 800 ; 1200 ; 1500
+0 ; Runner 1 ; 64 ; 128 ; 192 ; 240
+1 ; Runner 2 ; 80 ; 160 ; 240 ; 300
+2 ; Runner 3 ; 96 ; 192 ; 288 ; 360
+```
+
+:::
+
+:::{group-tab} Tidy format
+
+```{csv-table}
+:delim: ,
+
+#, Runner, distance, time
+0, Runner 1, 400, 64
+1, Runner 2, 400, 80
+2, Runner 3, 400, 96
+3, Runner 1, 800, 128
+4, Runner 2, 800, 160
+5, Runner 3, 800, 192
+6, Runner 1, 1200, 192
+7, Runner 2, 1200, 240 
+8, Runner 3, 1200, 288 
+9, Runner 1, 1500, 240 
+10, Runner 2, 1500, 300 
+11, Runner 3, 1500, 360
+```
+
+:::
+
+::::
+
+Most tabular data is either in a tidy format or a untidy format (some people
+refer them as the long format or the wide format). The main differences are
+summarised below:
+
+- In untidy (wide) format, each row represents an observation consisting of
+multiple variables and each variable has its own column. This is intuitive and
+easy for us to understand and make comparisons across different variables,
+calculate statistics, etc.
+- In tidy (long) format , i.e. column-oriented format, each row represents only
+one variable of the observation, and can be considered “computer readable”.
+When it comes to data analysis using Pandas, the tidy format is recommended:
+- Each column can be stored as a vector and this not only saves memory but also
+allows for vectorized calculations which are much faster.
+- It’s easier to filter, group, join and aggregate the data.
+
+Imagine, for example, that you would like to compute the speed as
+`speed=distance/time`. The untidy format would make this much clunkier, as:
+
+- The distances are encoded as column names, not as data points (rows)
+- The speed would have to be stored in a new dataframe since it would not
+fit in that data structure.
+
+In comparison, in a tidy dataframe, this computation would be a simple
+operation between two columns.
+
+:::{tip}
+
+Recovering a wide dataframe from a tidy one is commonly referred to as
+*pivoting*. Most dataframe libraries provide a `pivot()` or
+`pivot_table` function.
+
+:::
+
+## Pandas & Polars
+
+Historically, [Pandas](https://pandas.pydata.org/) has been the go-to package
+to handle dataframes in Python. It is based on NumPy (each column is a Numpy
+vector) and has been the traditional workhorse for tabular data, with a stable
+API and a large ecosystem built around it, including the [Seaborn](https://seaborn.pydata.org/)
+statistical plotting framework.
+More recently, [Polars](https://docs.pola.rs/) was introduced as a more modern
+and faster alternative to handle dataframes. It is written in Rust and supports
+out of the box out of core evaluation (i.e. does not need loading the whole
+dataset in memory), lazy evaluation of queries and automatically uses multiple
+threads. Moreover, experimental GPU support is available through
+[cuDF](https://docs.rapids.ai/api/cudf/stable/). In the remainder of this
+episode, the [NYC taxi](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
+will be used to showcase how datasets can be accessed, summarised and manipulated
+in both Pandas and Polars. The dataset can be download in [Parquet](https://parquet.apache.org/)
+format from the link above (the file for the month of January was used in this
+case). The dataset contains information about taxi trips performed in New York,
+such as the ID of the vendor, the total fare, pickup and drop-off time and
+location (expressed as an ID), type of payment, whether additional fees were
+charged and more.
+
+### Opening a dataset
+
+Assuming the file is called `yellow_tripdata_2025-01.parquet`, the dataset can be
+opened as:
+
+::::{tabs}
+
+:::{group-tab} Pandas
+
+```python
+import pandas as pd 
+df = pd.read_parquet("yellow_tripdata_2025-01.parquet")
+```
+
+:::
+
+:::{group-tab} Polars
+
+```python
+import polars as pl 
+df = pl.read_parquet("yellow_tripdata_2025-01.parquet")
+```
+
+:::
+::::
+
+### Description and summarisation
+
+We can get a first understanding of the contents of a dataframe by printing
+the first few lines, the "schema" (i.e. the number and type of each column)
+and summary statistics as follows:
+
+:::::{tabs}
+
+::::{group-tab} Pandas
+
+```python
+df.head()
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+   VendorID tpep_pickup_datetime tpep_dropoff_datetime  ...  congestion_surcharge  Airport_fee  cbd_congestion_fee
+0         1  2025-01-01 00:18:38   2025-01-01 00:26:59  ...                   2.5          0.0                 0.0
+1         1  2025-01-01 00:32:40   2025-01-01 00:35:13  ...                   2.5          0.0                 0.0
+2         1  2025-01-01 00:44:04   2025-01-01 00:46:01  ...                   2.5          0.0                 0.0
+3         2  2025-01-01 00:14:27   2025-01-01 00:20:01  ...                   0.0          0.0                 0.0
+4         2  2025-01-01 00:21:34   2025-01-01 00:25:06  ...                   0.0          0.0                 0.0
+```
+
+:::
+
+```python
+df.info()
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+RangeIndex: 3475226 entries, 0 to 3475225
+Data columns (total 20 columns):
+ #   Column                 Dtype
+---  ------                 -----
+ 0   VendorID               int32
+ 1   tpep_pickup_datetime   datetime64[us]
+ 2   tpep_dropoff_datetime  datetime64[us]
+ 3   passenger_count        float64
+ 4   trip_distance          float64
+ 5   RatecodeID             float64
+ 6   store_and_fwd_flag     object
+ 7   PULocationID           int32
+ 8   DOLocationID           int32
+ 9   payment_type           int64
+ 10  fare_amount            float64
+ 11  extra                  float64
+ 12  mta_tax                float64
+ 13  tip_amount             float64
+ 14  tolls_amount           float64
+ 15  improvement_surcharge  float64
+ 16  total_amount           float64
+ 17  congestion_surcharge   float64
+ 18  Airport_fee            float64
+ 19  cbd_congestion_fee     float64
+dtypes: datetime64[us](2), float64(13), int32(3), int64(1), object(1)
+memory usage: 490.5+ MB
+```
+
+:::
+
+```python
+df.describe()
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+           VendorID        tpep_pickup_datetime       tpep_dropoff_datetime  ...  congestion_surcharge   Airport_fee  cbd_congestion_fee
+count  3.475226e+06                     3475226                     3475226  ...          2.935077e+06  2.935077e+06        3.475226e+06
+mean   1.785428e+00  2025-01-17 11:02:55.910964  2025-01-17 11:17:56.997901  ...          2.225237e+00  1.239111e-01        4.834093e-01
+min    1.000000e+00         2024-12-31 20:47:55         2024-12-18 07:52:40  ...         -2.500000e+00 -1.750000e+00       -7.500000e-01
+25%    2.000000e+00         2025-01-10 07:59:01  2025-01-10 08:15:29.500000  ...          2.500000e+00  0.000000e+00        0.000000e+00
+50%    2.000000e+00         2025-01-17 15:41:33         2025-01-17 15:59:34  ...          2.500000e+00  0.000000e+00        7.500000e-01
+75%    2.000000e+00         2025-01-24 19:34:06         2025-01-24 19:48:31  ...          2.500000e+00  0.000000e+00        7.500000e-01
+max    7.000000e+00         2025-02-01 00:00:44         2025-02-01 23:44:11  ...          2.500000e+00  6.750000e+00        7.500000e-01
+std    4.263282e-01                         NaN                         NaN  ...          9.039932e-01  4.725090e-01        3.619307e-01
+
+[8 rows x 19 columns]
+```
+
+:::
+::::
+
+::::{group-tab} Polars
+
+```python
+df.head()
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```python
+shape: (5, 20)
+┌──────────┬──────────────────┬──────────────────┬─────────────────┬───┬──────────────┬──────────────────┬─────────────┬─────────────────┐
+│ VendorID ┆ tpep_pickup_date ┆ tpep_dropoff_dat ┆ passenger_count ┆ … ┆ total_amount ┆ congestion_surch ┆ Airport_fee ┆ cbd_congestion_ │
+│ ---      ┆ time             ┆ etime            ┆ ---             ┆   ┆ ---          ┆ arge             ┆ ---         ┆ fee             │
+│ i32      ┆ ---              ┆ ---              ┆ i64             ┆   ┆ f64          ┆ ---              ┆ f64         ┆ ---             │
+│          ┆ datetime[μs]     ┆ datetime[μs]     ┆                 ┆   ┆              ┆ f64              ┆             ┆ f64             │
+╞══════════╪══════════════════╪══════════════════╪═════════════════╪═══╪══════════════╪══════════════════╪═════════════╪═════════════════╡
+│ 1        ┆ 2025-01-01       ┆ 2025-01-01       ┆ 1               ┆ … ┆ 18.0         ┆ 2.5              ┆ 0.0         ┆ 0.0             │
+│          ┆ 00:18:38         ┆ 00:26:59         ┆                 ┆   ┆              ┆                  ┆             ┆                 │
+│ 1        ┆ 2025-01-01       ┆ 2025-01-01       ┆ 1               ┆ … ┆ 12.12        ┆ 2.5              ┆ 0.0         ┆ 0.0             │
+│          ┆ 00:32:40         ┆ 00:35:13         ┆                 ┆   ┆              ┆                  ┆             ┆                 │
+│ 1        ┆ 2025-01-01       ┆ 2025-01-01       ┆ 1               ┆ … ┆ 12.1         ┆ 2.5              ┆ 0.0         ┆ 0.0             │
+│          ┆ 00:44:04         ┆ 00:46:01         ┆                 ┆   ┆              ┆                  ┆             ┆                 │
+│ 2        ┆ 2025-01-01       ┆ 2025-01-01       ┆ 3               ┆ … ┆ 9.7          ┆ 0.0              ┆ 0.0         ┆ 0.0             │
+│          ┆ 00:14:27         ┆ 00:20:01         ┆                 ┆   ┆              ┆                  ┆             ┆                 │
+│ 2        ┆ 2025-01-01       ┆ 2025-01-01       ┆ 3               ┆ … ┆ 8.3          ┆ 0.0              ┆ 0.0         ┆ 0.0             │
+│          ┆ 00:21:34         ┆ 00:25:06         ┆                 ┆   ┆              ┆                  ┆             ┆                 │
+└──────────┴──────────────────┴──────────────────┴─────────────────┴───┴──────────────┴──────────────────┴─────────────┴─────────────────┘
+```
+
+:::
+
+```python
+df.describe()
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+shape: (9, 21)
+┌────────────┬────────────┬──────────────────┬─────────────────┬───┬──────────────┬─────────────────┬─────────────┬─────────────────┐
+│ statistic  ┆ VendorID   ┆ tpep_pickup_date ┆ tpep_dropoff_da ┆ … ┆ total_amount ┆ congestion_surc ┆ Airport_fee ┆ cbd_congestion_ │
+│ ---        ┆ ---        ┆ time             ┆ tetime          ┆   ┆ ---          ┆ harge           ┆ ---         ┆ fee             │
+│ str        ┆ f64        ┆ ---              ┆ ---             ┆   ┆ f64          ┆ ---             ┆ f64         ┆ ---             │
+│            ┆            ┆ str              ┆ str             ┆   ┆              ┆ f64             ┆             ┆ f64             │
+╞════════════╪════════════╪══════════════════╪═════════════════╪═══╪══════════════╪═════════════════╪═════════════╪═════════════════╡
+│ count      ┆ 3.475226e6 ┆ 3475226          ┆ 3475226         ┆ … ┆ 3.475226e6   ┆ 2.935077e6      ┆ 2.935077e6  ┆ 3.475226e6      │
+│ null_count ┆ 0.0        ┆ 0                ┆ 0               ┆ … ┆ 0.0          ┆ 540149.0        ┆ 540149.0    ┆ 0.0             │
+│ mean       ┆ 1.785428   ┆ 2025-01-17       ┆ 2025-01-17      ┆ … ┆ 25.611292    ┆ 2.225237        ┆ 0.123911    ┆ 0.483409        │
+│            ┆            ┆ 11:02:55.910964  ┆ 11:17:56.997901 ┆   ┆              ┆                 ┆             ┆                 │
+│ std        ┆ 0.426328   ┆ null             ┆ null            ┆ … ┆ 463.658478   ┆ 0.903993        ┆ 0.472509    ┆ 0.361931        │
+│ min        ┆ 1.0        ┆ 2024-12-31       ┆ 2024-12-18      ┆ … ┆ -901.0       ┆ -2.5            ┆ -1.75       ┆ -0.75           │
+│            ┆            ┆ 20:47:55         ┆ 07:52:40        ┆   ┆              ┆                 ┆             ┆                 │
+│ 25%        ┆ 2.0        ┆ 2025-01-10       ┆ 2025-01-10      ┆ … ┆ 15.2         ┆ 2.5             ┆ 0.0         ┆ 0.0             │
+│            ┆            ┆ 07:59:01         ┆ 08:15:29        ┆   ┆              ┆                 ┆             ┆                 │
+│ 50%        ┆ 2.0        ┆ 2025-01-17       ┆ 2025-01-17      ┆ … ┆ 19.95        ┆ 2.5             ┆ 0.0         ┆ 0.75            │
+│            ┆            ┆ 15:41:34         ┆ 15:59:34        ┆   ┆              ┆                 ┆             ┆                 │
+│ 75%        ┆ 2.0        ┆ 2025-01-24       ┆ 2025-01-24      ┆ … ┆ 27.78        ┆ 2.5             ┆ 0.0         ┆ 0.75            │
+│            ┆            ┆ 19:34:06         ┆ 19:48:31        ┆   ┆              ┆                 ┆             ┆                 │
+│ max        ┆ 7.0        ┆ 2025-02-01       ┆ 2025-02-01      ┆ … ┆ 863380.37    ┆ 2.5             ┆ 6.75        ┆ 0.75            │
+│            ┆            ┆ 00:00:44         ┆ 23:44:11        ┆   ┆              ┆                 ┆             ┆                 │
+└────────────┴────────────┴──────────────────┴─────────────────┴───┴──────────────┴─────────────────┴─────────────┴─────────────────┘
+```
+
+:::
+
+::::
+
+:::::
+
+### Indexing
+
+We can index data in the dataframe as follows:
+
+:::::{tabs}
+
+::::{group-tab} Pandas
+
+```python
+# With this we can select a column
+df['VendorID'] # Could also be df.VendorID 
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+0          1
+1          1
+2          1
+3          2
+4          2
+          ..
+3475221    2
+3475222    2
+3475223    2
+3475224    2
+3475225    2
+```
+
+:::
+
+```python
+# Get a row 
+df.iloc[1000,:]
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+VendorID                                   2
+tpep_pickup_datetime     2025-01-01 00:08:06
+tpep_dropoff_datetime    2025-01-01 00:16:20
+passenger_count                          4.0
+trip_distance                           1.53
+RatecodeID                               1.0
+store_and_fwd_flag                         N
+PULocationID                             114
+DOLocationID                              90
+payment_type                               1
+fare_amount                             10.0
+extra                                    1.0
+mta_tax                                  0.5
+tip_amount                              2.25
+tolls_amount                             0.0
+improvement_surcharge                    1.0
+total_amount                           17.25
+congestion_surcharge                     2.5
+Airport_fee                              0.0
+cbd_congestion_fee                       0.0
+Name: 1000, dtype: object
+>>> df.iloc[1000,:]
+VendorID                                   2
+tpep_pickup_datetime     2025-01-01 00:08:06
+tpep_dropoff_datetime    2025-01-01 00:16:20
+passenger_count                          4.0
+trip_distance                           1.53
+RatecodeID                               1.0
+store_and_fwd_flag                         N
+PULocationID                             114
+DOLocationID                              90
+payment_type                               1
+fare_amount                             10.0
+extra                                    1.0
+mta_tax                                  0.5
+tip_amount                              2.25
+tolls_amount                             0.0
+improvement_surcharge                    1.0
+total_amount                           17.25
+congestion_surcharge                     2.5
+Airport_fee                              0.0
+cbd_congestion_fee                       0.0
+```
+
+:::
+
+::::
+
+::::{group-tab} Polars
+
+```python
+df["VendorID"] # A more Polars-y idiom is to use df.select(["VendorID"])
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+shape: (3_475_226,)
+Series: 'VendorID' [i32]
+[
+ 1
+ 1
+ 1
+ 2
+ 2
+ …
+ 2
+ 2
+ 2
+ 2
+ 2
+]
+```
+
+:::
+
+```python
+df[1000][:]
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+┌──────────┬─────────────┬─────────────┬────────────┬───┬────────────┬────────────┬────────────┬────────────┐
+│ VendorID ┆ tpep_pickup ┆ tpep_dropof ┆ passenger_ ┆ … ┆ total_amou ┆ congestion ┆ Airport_fe ┆ cbd_conges │
+│ ---      ┆ _datetime   ┆ f_datetime  ┆ count      ┆   ┆ nt         ┆ _surcharge ┆ e          ┆ tion_fee   │
+│ i32      ┆ ---         ┆ ---         ┆ ---        ┆   ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
+│          ┆ datetime[μs ┆ datetime[μs ┆ i64        ┆   ┆ f64        ┆ f64        ┆ f64        ┆ f64        │
+│          ┆ ]           ┆ ]           ┆            ┆   ┆            ┆            ┆            ┆            │
+╞══════════╪═════════════╪═════════════╪════════════╪═══╪════════════╪════════════╪════════════╪════════════╡
+│ 2        ┆ 2025-01-01  ┆ 2025-01-01  ┆ 4          ┆ … ┆ 17.25      ┆ 2.5        ┆ 0.0        ┆ 0.0        │
+│          ┆ 00:08:06    ┆ 00:16:20    ┆            ┆   ┆            ┆            ┆            ┆            │
+└──────────┴─────────────┴─────────────┴────────────┴───┴────────────┴────────────┴────────────┴────────────┘
+```
+
+:::
+
+::::
+:::::
+
+In both cases, a similar syntax can be used to do in-place modification (e.g. `df[row][column]=...`).
+Please note that this kind of replacement carries a big performance penalty,
+which is designed to do column-wide operations with minimal overhead. This is
+commonly achieved through the [expression API](https://docs.pola.rs/user-guide/concepts/expressions-and-contexts/), as detailed in the next section.
+
+### Common workflows
+
+It is quite common to stratify (i.e. divide the samples into a number of groups
+based on categorical variables) to produce descriptive statistics (i.e.
+statistics that provide a summary of the samples and do not aim to predict
+anything regarding the population it comes from). This is commonly achieved
+through a `group-by` workflow, where the following happens:
+
+- Splitting: data is partitioned into different groups based on some criterion
+- Applying: applying a function/performing a calculation to each group
+- Combining: assembling a dataframe (of potentially any size) with the results.
+
+This is type of workflow is represented below.
+
+:::{figure-md}
+![split-apply-combine](groupby.png)
+
+Source: [Earth and environmental data science](https://earth-env-data-science.github.io/intro.html)
+:::
+
+As an example, let us try to to compute the total fare for each hour, split
+by payment type.
+
+:::::{tabs}
+
+::::{group-tab} Pandas
+
+```python
+#First let us extract the hour from the tpep_pickup_datetime column 
+df["hour"] = df['tpep_pickup_datetime'].dt.hour 
+
+hourly_fare = (
+  df.groupby(['hour', 'payment_type'], observed=False)['fare_amount']
+  .sum()
+  .reset_index()
+  .sort_values(['hour', 'payment_type'])
+)
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+     hour  payment_type  fare_amount
+0       0             0    352227.86
+1       0             1   1088201.12
+2       0             2    156546.07
+3       0             3      3537.91
+4       0             4      3941.24
+..    ...           ...          ...
+116    23             0    534063.76
+117    23             1   1618143.37
+118    23             2    219991.22
+119    23             3      4765.54
+120    23             4      3293.61
+```
+
+:::
+
+The `groupby` statement is used to stratify the `fare_amount` column by
+hour and payment type. Then the amounts per hour and type get summed and
+sorted according to time and payment type.
+
+::::
+
+::::{group-tab} Polars
+
+```python
+#First, let us extract the hour from the tpep_pickup_datetime column 
+df = df.with_columns(pl.col('tpep_pickup_datetime').dt.hour().alias('hour'))
+
+hourly_fare = (
+    df.group_by(['hour', 'payment_type'])
+    .agg(pl.sum('fare_amount').alias('total_fare'))
+    .sort(['hour', 'payment_type'])
+)
+```
+
+:::{exercise} Output
+:class: dropdown
+
+```
+┌──────┬──────────────┬────────────┐
+│ hour ┆ payment_type ┆ total_fare │
+│ ---  ┆ ---          ┆ ---        │
+│ i8   ┆ i64          ┆ f64        │
+╞══════╪══════════════╪════════════╡
+│ 0    ┆ 0            ┆ 352227.86  │
+│ 0    ┆ 1            ┆ 1.0882e6   │
+│ 0    ┆ 2            ┆ 156546.07  │
+│ 0    ┆ 3            ┆ 3537.91    │
+│ 0    ┆ 4            ┆ 3941.24    │
+│ …    ┆ …            ┆ …          │
+│ 23   ┆ 0            ┆ 534063.76  │
+│ 23   ┆ 1            ┆ 1.6181e6   │
+│ 23   ┆ 2            ┆ 219991.22  │
+│ 23   ┆ 3            ┆ 4765.54    │
+│ 23   ┆ 4            ┆ 3293.61    │
+└──────┴──────────────┴────────────┘
+```
+
+:::
+
+The `group_by` statement is used to stratify by hour and payment type,
+followed by an aggregated sum of the `fare_amount` column and a sort.
+Notice how the syntax has more of a functional programming flavour to it
+(`pl.col`, `pl.sum` as pure functions). This will be clarified further in
+the next section. Also note that Polars by default spreads the workload
+over multiple threads.
+
+::::
+
+:::::
+
+## Idiomatic Polars
+
+Polars introduces a few variations to dataset operations compared to the
+traditional Pandas approach. In particular, a domain-specific language
+(DSL) was developed, where *expressions* are written to represent dataset
+operations and *context*s provide the environment where they produce a
+result.
+
+### Expressions
+
+Let's say that we created a `trip_duration_sec` column in our NYC cab database
+and, given the `trip_distance` column, we want to compute the average speed.
+In Polars, this can be achieved with:
+
+```python
+pl.col('trip_distance') / pl.col(`trip_duration_sec`)
+```
+
+This is a lazy representation of an operation we want to perform, which can
+be further manipulated or just printed. For it to actually produce data, a
+*context* is needed.
+
+### Contexts
+
+The same Polars expression can produce different results depending on the
+context where it is used. Four common contexts include:
+
+- `select`
+- `with_columns`
+- `filter`
+- `group_by`
+
+Both `select` and `with_columns` can produce new columns, which may be
+aggregations, combinations of other columns, or literals. The difference
+between the two is that `select` only includes the columns contained in its
+input expression, whereas `with_columns` returns a new dataframe which
+contains all the columns from the original dataframe and the new ones created
+by the expression. To exemplify, using our earlier example of computing the
+average speed during a trip, using `select` would yield a single column,
+whereas `with_columns` would return the original dataframe with an additional
+column called `trip distance`:
+
+```python
+df.select(pl.col('trip_distance')/pl.col('trip_duration_sec')*3600)
+shape: (3_475_226, 1)
+┌───────────────┐
+│ trip_distance │
+│ ---           │
+│ f64           │
+╞═══════════════╡
+│ 11.497006     │
+│ 11.764706     │
+│ 18.461538     │
+│ 5.60479       │
+│ 11.207547     │
+│ …             │
+│ 13.68899      │
+│ 19.42398      │
+│ 9.879418      │
+│ 9.339901      │
+│ 12.781395     │
+└───────────────┘
+```
+
+```python
+df.with_columns((pl.col('trip_distance')/pl.col('trip_duration_sec')*3600).alias("avg_sp
+eed_mph"))
+shape: (3_475_226, 22)
+┌──────────┬──────────┬──────────┬──────────┬───┬──────────┬──────────┬──────────┬──────────┐
+│ VendorID ┆ tpep_pic ┆ tpep_dro ┆ passenge ┆ … ┆ Airport_ ┆ cbd_cong ┆ trip_dur ┆ avg_spee │
+│ ---      ┆ kup_date ┆ poff_dat ┆ r_count  ┆   ┆ fee      ┆ estion_f ┆ ation_se ┆ d_mph    │
+│ i32      ┆ time     ┆ etime    ┆ ---      ┆   ┆ ---      ┆ ee       ┆ c        ┆ ---      │
+│          ┆ ---      ┆ ---      ┆ i64      ┆   ┆ f64      ┆ ---      ┆ ---      ┆ f64      │
+│          ┆ datetime ┆ datetime ┆          ┆   ┆          ┆ f64      ┆ i64      ┆          │
+│          ┆ [μs]     ┆ [μs]     ┆          ┆   ┆          ┆          ┆          ┆          │
+╞══════════╪══════════╪══════════╪══════════╪═══╪══════════╪══════════╪══════════╪══════════╡
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ 1        ┆ … ┆ 0.0      ┆ 0.0      ┆ 501      ┆ 11.49700 │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆ 6        │
+│          ┆ 00:18:38 ┆ 00:26:59 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ 1        ┆ … ┆ 0.0      ┆ 0.0      ┆ 153      ┆ 11.76470 │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆ 6        │
+│          ┆ 00:32:40 ┆ 00:35:13 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ 1        ┆ … ┆ 0.0      ┆ 0.0      ┆ 117      ┆ 18.46153 │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆ 8        │
+│          ┆ 00:44:04 ┆ 00:46:01 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ 3        ┆ … ┆ 0.0      ┆ 0.0      ┆ 334      ┆ 5.60479  │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:14:27 ┆ 00:20:01 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ 3        ┆ … ┆ 0.0      ┆ 0.0      ┆ 212      ┆ 11.20754 │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆ 7        │
+│          ┆ 00:21:34 ┆ 00:25:06 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ …        ┆ …        ┆ …        ┆ …        ┆ … ┆ …        ┆ …        ┆ …        ┆ …        │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 881      ┆ 13.68899 │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:01:48 ┆ 23:16:29 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-02- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 1618     ┆ 19.42398 │
+│          ┆ 31       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:50:29 ┆ 00:17:27 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 962      ┆ 9.879418 │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:26:59 ┆ 23:43:01 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 1218     ┆ 9.339901 │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:14:34 ┆ 23:34:52 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-02- ┆ null     ┆ … ┆ null     ┆ 0.0      ┆ 645      ┆ 12.78139 │
+│          ┆ 31       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆ 5        │
+│          ┆ 23:56:42 ┆ 00:07:27 ┆          ┆   ┆          ┆          ┆          ┆          │
+└──────────┴──────────┴──────────┴──────────┴───┴──────────┴──────────┴──────────┴──────────┘
+```
+
+The `filter` context filters the rows of a dataframe based on one (or more)
+expressions which evaluate to a Boolean, e.g.
+
+```python
+df.filter(pl.col('avg_speed_mph') < 1)
+shape: (104_410, 22)
+┌──────────┬──────────┬──────────┬──────────┬───┬──────────┬──────────┬──────────┬──────────┐
+│ VendorID ┆ tpep_pic ┆ tpep_dro ┆ passenge ┆ … ┆ Airport_ ┆ cbd_cong ┆ trip_dur ┆ avg_spee │
+│ ---      ┆ kup_date ┆ poff_dat ┆ r_count  ┆   ┆ fee      ┆ estion_f ┆ ation_se ┆ d_mph    │
+│ i32      ┆ time     ┆ etime    ┆ ---      ┆   ┆ ---      ┆ ee       ┆ c        ┆ ---      │
+│          ┆ ---      ┆ ---      ┆ i64      ┆   ┆ f64      ┆ ---      ┆ ---      ┆ f64      │
+│          ┆ datetime ┆ datetime ┆          ┆   ┆          ┆ f64      ┆ i64      ┆          │
+│          ┆ [μs]     ┆ [μs]     ┆          ┆   ┆          ┆          ┆          ┆          │
+╞══════════╪══════════╪══════════╪══════════╪═══╪══════════╪══════════╪══════════╪══════════╡
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ 1        ┆ … ┆ 0.0      ┆ 0.0      ┆ 10       ┆ 0.0      │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:37:43 ┆ 00:37:53 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ 3        ┆ … ┆ 0.0      ┆ 0.0      ┆ 8        ┆ 0.0      │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:57:08 ┆ 00:57:16 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ 1        ┆ … ┆ 0.0      ┆ 0.0      ┆ 1910     ┆ 0.0      │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:27:40 ┆ 00:59:30 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ 4        ┆ … ┆ 0.0      ┆ 0.0      ┆ 5        ┆ 0.0      │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:56:49 ┆ 00:56:54 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ 0        ┆ … ┆ 0.0      ┆ 0.0      ┆ 2        ┆ 0.0      │
+│          ┆ 01       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 00:42:42 ┆ 00:42:44 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ …        ┆ …        ┆ …        ┆ …        ┆ … ┆ …        ┆ …        ┆ …        ┆ …        │
+│ 1        ┆ 2025-01- ┆ 2025-02- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 266      ┆ 0.0      │
+│          ┆ 31       ┆ 01       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:59:17 ┆ 00:03:43 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 1100     ┆ 0.0      │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:17:38 ┆ 23:35:58 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 2        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.0      ┆ 161      ┆ 0.0      │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:39:25 ┆ 23:42:06 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 24       ┆ 0.0      │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:30:42 ┆ 23:31:06 ┆          ┆   ┆          ┆          ┆          ┆          │
+│ 1        ┆ 2025-01- ┆ 2025-01- ┆ null     ┆ … ┆ null     ┆ 0.75     ┆ 1556     ┆ 0.0      │
+│          ┆ 31       ┆ 31       ┆          ┆   ┆          ┆          ┆          ┆          │
+│          ┆ 23:10:25 ┆ 23:36:21 ┆          ┆   ┆          ┆          ┆          ┆          │
+└──────────┴──────────┴──────────┴──────────┴───┴──────────┴──────────┴──────────┴──────────┘
+```
+
+The `group_by` context behaves like its Pandas counterpart.
+
+### Transformations
+
+A `join` operation combines columns from one or more dataframes into a new
+dataframe. There are different joining strategies, which influence how columns
+are combined and what rows are included in the final set. A common type is the
+*equi* join, where rows are matched by a key expression. Let us clarify this
+with an example. The `df` dataframe does not include specific coordinates for
+each pickup and drop-off, rather only a `PULocationID` and a `DOLocationID`.
+There is a `taxy_zones_xy.csv` file that contains, for each `LocationID`, the
+latitude (X) and longitude (Y) of each location, as well as the name of zone
+and borough:
+
+```python
+
+lookup_df = pl.read_csv('taxy_zones_xy.csv', has_header=True)
+lookup_df.head()
+┌────────────┬────────────┬───────────┬─────────────────────────┬───────────────┐
+│ LocationID ┆ X          ┆ Y         ┆ zone                    ┆ borough       │
+│ ---        ┆ ---        ┆ ---       ┆ ---                     ┆ ---           │
+│ i64        ┆ f64        ┆ f64       ┆ str                     ┆ str           │
+╞════════════╪════════════╪═══════════╪═════════════════════════╪═══════════════╡
+│ 1          ┆ -74.176786 ┆ 40.689516 ┆ Newark Airport          ┆ EWR           │
+│ 2          ┆ -73.826126 ┆ 40.625724 ┆ Jamaica Bay             ┆ Queens        │
+│ 3          ┆ -73.849479 ┆ 40.865888 ┆ Allerton/Pelham Gardens ┆ Bronx         │
+│ 4          ┆ -73.977023 ┆ 40.724152 ┆ Alphabet City           ┆ Manhattan     │
+│ 5          ┆ -74.18993  ┆ 40.55034  ┆ Arden Heights           ┆ Staten Island │
+└────────────┴────────────┴───────────┴─────────────────────────┴───────────────┘
+```
+
+This can be used to append these columns to the original df to have some form
+of geographical data as follows (e.g. for the `PULocationID`):
+
+```python
+df = df.join(lookup_df, left_on='PULocationID', right_on='LocationID', how='left'
+, suffix='_pickup')
+```
+
+In the line above, `left_on` is used to indicate the *key* in the original
+dataframe, `right_on` is used to specify the *key* in the `lookup_df` dataframe,
+`how=left` means that the columns from the second dataframe will be added to
+the first (and not the other way around) and `suffix` is what will be added to
+the names of the joined columns (i.e., df will contain columns called `X_pickup`,
+`Y_pickup`, `zone_pickup` and `borough_pickup`). More information on join
+operations can be found [here](https://docs.pola.rs/user-guide/transformations/joins/).
+
+## Exercises
+
+::::{exercise} Joining geographical data
+We have already seen how to add actual latitude and longitude for the pickups.
+Now do the same for the drop-offs!
+
+:::{solution}
+
+```python
+df = df.join(lookup_df, left_on='DOLocationID', right_on='LocationID', how='left'
+, suffix='_dropoff')
+```
+
+:::
+
+::::
+
+::::{exercise} Feature engineering: enriching the dataset
+We want to understand a bit more of the traffic in the city by creating
+new features (i.e. columns), in particular:
+
+- Split the pickup datetime into hour, minute, day of the week and month
+to indentify daily, weekly and monthly trends
+- Compute the average speed as an indicator of congestion (low speed ->
+traffic jam)
+- Stratify the trip distance and fare by zone to identify how expensive
+different zones are.
+Below is a skeleton of the code, where some lines have been blanked out
+for you to fill (marked with `TODO:...`)
+
+```python
+import polars as pl
+raw_df = pl.read_parquet('yellow_tripdata_2025-01.parquet')
+df = raw_df.with_columns([
+    pl.col("tpep_pickup_datetime").dt.hour().alias("pickup_hour"),
+    #TODO: do this for the minute
+    pl.col("tpep_pickup_datetime").dt.day_of_week().alias("pickup_dow"),   # Mon=0 … Sun=6
+    pl.col("tpep_pickup_datetime").dt.month().alias("pickup_month"),
+    # Trip duration in seconds
+    (pl.col("tpep_dropoff_datetime") - pl.col("tpep_pickup_datetime"))
+        .dt.total_seconds()
+        .alias("trip_duration_sec"),
+])
+
+df = df.with_column(
+    #TODO: add expression for average velocity here
+    .replace_nan(None)                        # protect against div‑by‑zero
+    .alias("avg_speed_mph")
+)
+
+# Compute per‑pickup‑zone statistics once
+zone_stats = (
+    df.groupby("PULocationID")
+      .agg([
+          pl.mean("fare_amount").alias("zone_avg_fare"),
+          #TODO: do the same for the trip distance here
+          pl.count().alias("zone_trip_cnt"),
+      ])
+      .rename({"PULocationID": "pickup_zone_id"})   # avoid name clash later
+)
+
+# Join those stats back onto the original rows
+df = df.join(zone_stats, left_on="PULocationID", right_on="pickup_zone_id", how="left")
+```
+
+While we haven't covered the `join` instruction earlier, its main role
+is to "spread" the `zone_stats` over all the rides in the original dataframe
+(i.e. write the `zone_avg_fare` on each ride in `df`). `join` has its roots
+in relational databases, where different tables can be merged based on a
+common column.
+
+:::{solution}
+
+```python
+import polars as pl
+raw_df = pl.read_parquet('yellow_tripdata_2025-01.parquet')
+df = raw_df.with_columns([
+    pl.col("tpep_pickup_datetime").dt.hour().alias("pickup_hour"),
+    pl.col("tpep_pickup_datetime").dt.minute().alias("pickup_minute"),
+    pl.col("tpep_pickup_datetime").dt.day_of_week().alias("pickup_dow"),   # Mon=0 … Sun=6
+    pl.col("tpep_pickup_datetime").dt.month().alias("pickup_month"),
+    # Trip duration in seconds
+    (pl.col("tpep_dropoff_datetime") - pl.col("tpep_pickup_datetime"))
+        .dt.seconds()
+        .alias("trip_duration_sec"),
+])
+
+df = df.with_column(
+    (
+        pl.col("trip_distance") /
+        (pl.col("trip_duration_sec") / 3600)   # seconds → hours
+    )
+    .replace_nan(None)                        # protect against div‑by‑zero
+    .alias("avg_speed_mph")
+)
+
+# Compute per‑pickup‑zone statistics once
+zone_stats = (
+    df.groupby("PULocationID")
+      .agg([
+          pl.mean("fare_amount").alias("zone_avg_fare"),
+          pl.mean("trip_distance").alias("zone_avg_dist"),
+          pl.count().alias("zone_trip_cnt"),
+      ])
+      .rename({"PULocationID": "pickup_zone_id"})   # avoid name clash later
+)
+
+# Join those stats back onto the original rows
+df = df.join(zone_stats, left_on="PULocationID", right_on="pickup_zone_id", how="left")
+```
+
+:::
+::::
+
+::::{exercise} More feature engineering!
+Similarly to the exercise above, define the following features in the data:
+
+- `pickup_hour` extracted from `tpep_pickup_time`
+- `is_weekend`, a Boolean value for each trip
+- `avg_speed_mph`, exactly as before
+- `tip_to_fare_ratio`, dividing the tip amount by the total fare. Be careful
+with division by 0
+- `fare_per_mile`, dividing the total fare by the distance
+- `dist_per_passenger`, the average distance travelled by each passenger
+(sum of all trip distances divided by number of trips)
+- `speed_per_pickup_area`, the average velocity stratified by pickup location
+- `dropoff_trip_count`, count of trips stratified per dropoff location
+
+:::{solution}
+
+```python
+import polars as pl
+raw_df = pl.read_parquet("yellow_tripdata_2025-01.parquet")
+df = raw_df.with_columns([
+    # 1. pickup_hour
+    pl.col("tpep_pickup_datetime").dt.hour().alias("pickup_hour"),
+
+    # 2. is_weekend (Sat=5, Sun=6)
+    pl.col("tpep_pickup_datetime")
+      .dt.day_of_week()
+      .is_in([5, 6])
+      .alias("is_weekend"),
+
+    # 3. trip_duration_sec
+    (pl.col("tpep_dropoff_datetime") - pl.col("tpep_pickup_datetime"))
+        .dt.seconds()
+        .alias("trip_duration_sec"),
+
+    # 4. avg_speed_mph
+    (
+        pl.col("trip_distance") /
+        (pl.col("trip_duration_sec") / 3600)
+    )
+    .replace_nan(None)                       # protect against div‑by‑zero
+    .alias("avg_speed_mph"),
+
+    # 5. tip_to_fare_ratio
+    (pl.col("tip_amount") / pl.col("fare_amount"))
+        .replace_inf(None)
+        .replace_nan(None)
+        .alias("tip_to_fare_ratio"),
+
+    # 6. fare_per_mile
+    (pl.col("fare_amount") / pl.col("trip_distance"))
+        .replace_inf(None)
+        .replace_nan(None)
+        .alias("fare_per_mile"),
+
+    # 7. dist_per_passenger
+    (pl.col("trip_distance") / pl.col("passenger_count"))
+        .replace_inf(None)
+        .replace_nan(None)
+        .alias("dist_per_passenger"),
+])
+
+dropoff_stats = (
+    df.groupby("DOLocationID")
+      .agg([
+          pl.mean("avg_speed_mph").alias("dropoff_avg_speed"),
+          pl.count().alias("dropoff_trip_cnt"),
+      ])
+      .rename({"DOLocationID": "dropoff_zone_id"})   # avoid name clash later
+)
+
+# Join the per‑zone stats back onto every row
+df = df.join(dropoff_stats, left_on="DOLocationID", right_on="dropoff_zone_id", how="left")
+
+```
+
+:::
+::::
+
+## Summary
+
+We have seen how to deal with common workflows in both Pandas and Polars,
+starting from basic tasks like opening a dataset and inspecting it to performing
+split-apply-combine pipelines. We have seen how to use Polars to manipulate
+datasets and perform some basic feature engineering.
+
+:::{keypoints}
+
+- Dataframes are combinations of series
+- Both Pandas and Polars can be used to manipulate them
+- The expression API in Polars allows to perform advanced operations with a
+simple DSL.
+
+:::
+
+## See also
+
+There is a lot more to Polars than what we covered in this short introduction.
+For example, queries like the ones we introduced can be performed lazily, i.e.
+just declared and then run all together, giving the backend a chance to
+optimise them. This can dramatically improve performance in the case of complex
+queries. For this and a lot more, we refer you to the official
+[documentation](https://docs.pola.rs/).
diff --git a/content/taxi_zones_xy.csv b/content/taxi_zones_xy.csv
new file mode 100644
index 0000000..5d66bfc
--- /dev/null
+++ b/content/taxi_zones_xy.csv
@@ -0,0 +1,264 @@
+LocationID,X,Y,zone,borough
+1,-74.1767857452143,40.6895156480431,Newark Airport,EWR
+2,-73.8261257703202,40.6257242377511,Jamaica Bay,Queens
+3,-73.8494789238597,40.8658875419774,Allerton/Pelham Gardens,Bronx
+4,-73.9770229219339,40.7241521436714,Alphabet City,Manhattan
+5,-74.1899296712375,40.550340123832,Arden Heights,Staten Island
+6,-74.0677744607421,40.5990621740821,Arrochar/Fort Wadsworth,Staten Island
+7,-73.9214905669465,40.761084729151,Astoria,Queens
+8,-73.9232024092836,40.7786069617704,Astoria Park,Queens
+9,-73.7880202487407,40.7544109271114,Auburndale,Queens
+10,-73.7916654578906,40.6781247031195,Baisley Park,Queens
+11,-74.0106156305362,40.6039777088098,Bath Beach,Brooklyn
+12,-74.0154903292143,40.7024884135418,Battery Park,Manhattan
+13,-74.0161196692833,40.7116120831165,Battery Park City,Manhattan
+14,-74.0304470508297,40.6235842793089,Bay Ridge,Brooklyn
+15,-73.7879710847436,40.7852195006457,Bay Terrace/Fort Totten,Queens
+16,-73.7716678221165,40.7612088345005,Bayside,Queens
+17,-73.9491813010382,40.6919940962748,Bedford,Brooklyn
+18,-73.8869219948557,40.8687628819908,Bedford Park,Bronx
+19,-73.7278693961567,40.7364724391387,Bellerose,Queens
+20,-73.8860348405346,40.8577731142544,Belmont,Bronx
+21,-73.9813007259727,40.6020489840097,Bensonhurst East,Brooklyn
+22,-73.9942970150052,40.6096273400171,Bensonhurst West,Brooklyn
+23,-74.1594432875648,40.607504219542,Bloomfield/Emerson Hill,Staten Island
+24,-73.9655685290937,40.8020327729942,Bloomingdale,Manhattan
+25,-73.9864589876713,40.685614589464,Boerum Hill,Brooklyn
+26,-73.9895604065528,40.6286122405859,Borough Park,Brooklyn
+27,-73.9097811978144,40.5589500919216,Breezy Point/Fort Tilden/Riis Beach,Queens
+28,-73.80732908405,40.710852781755,Briarwood/Jamaica Hills,Queens
+29,-73.9605798481175,40.582195546571,Brighton Beach,Brooklyn
+30,-73.8200975464803,40.6048721837899,Broad Channel,Queens
+31,-73.875722045295,40.85992052114,Bronx Park,Bronx
+32,-73.8646241408313,40.8644517064252,Bronxdale,Bronx
+33,-73.995328797204,40.6962383270628,Brooklyn Heights,Brooklyn
+34,-73.9676989686791,40.7025590381029,Brooklyn Navy Yard,Brooklyn
+35,-73.9124814882782,40.6638098226151,Brownsville,Brooklyn
+36,-73.9165639802734,40.6990847380764,Bushwick North,Brooklyn
+37,-73.9259483634522,40.6962674524983,Bushwick South,Brooklyn
+38,-73.7355495585459,40.6932955704325,Cambria Heights,Queens
+39,-73.899773871952,40.6388789299243,Canarsie,Brooklyn
+40,-73.9958180819094,40.6785042230919,Carroll Gardens,Brooklyn
+41,-73.9520653308923,40.8042048286288,Central Harlem,Manhattan
+42,-73.9395164693167,40.8210462140976,Central Harlem North,Manhattan
+43,-73.9655721799594,40.7824597386361,Central Park,Manhattan
+44,-74.2295465457844,40.527298175003,Charleston/Tottenville,Staten Island
+45,-73.9982526213797,40.7130578275841,Chinatown,Manhattan
+46,-73.7864863118305,40.8474998984855,City Island,Bronx
+47,-73.8969291492323,40.8457549453718,Claremont/Bathgate,Bronx
+48,-73.9898566508315,40.7622367556144,Clinton East,Manhattan
+49,-73.9649337316477,40.6885158752504,Clinton Hill,Brooklyn
+50,-73.9938994033218,40.7666923746668,Clinton West,Manhattan
+51,-73.8304236318969,40.874061550909,Co-Op City,Bronx
+52,-73.9969224713871,40.6866045417646,Cobble Hill,Brooklyn
+53,-73.8440703458857,40.7819879584294,College Point,Queens
+54,-74.003092790563,40.6869911475205,Columbia Street,Brooklyn
+55,-73.9904741395563,40.5768996198541,Coney Island,Brooklyn
+56,-73.8590533534623,40.7415986155017,Corona,Queens
+57,-73.853384474855,40.7523160392058,Corona,Queens
+58,-73.8207053822972,40.8414754987567,Country Club,Bronx
+59,-73.8930744338338,40.8388599905968,Crotona Park,Bronx
+60,-73.8897822026553,40.8312941187883,Crotona Park East,Bronx
+61,-73.9412820292214,40.6738428461316,Crown Heights North,Brooklyn
+62,-73.9492699670747,40.6670895607261,Crown Heights South,Brooklyn
+63,-73.8776764323789,40.6848101947076,Cypress Hills,Brooklyn
+64,-73.7313921625056,40.760631276033,Douglaston,Queens
+65,-73.9855710635353,40.6953726093294,Downtown Brooklyn/MetroTech,Brooklyn
+66,-73.9863827475546,40.701732471687,DUMBO/Vinegar Hill,Brooklyn
+67,-74.0147358711929,40.6184544072203,Dyker Heights,Brooklyn
+68,-73.9999401565074,40.7483972248318,East Chelsea,Manhattan
+69,-73.9153662202336,40.8306076776139,East Concourse/Concourse Village,Bronx
+70,-73.8684029143644,40.7639478203254,East Elmhurst,Queens
+71,-73.9377345201578,40.6439285192804,East Flatbush/Farragut,Brooklyn
+72,-73.9202100548833,40.6524606311874,East Flatbush/Remsen Village,Brooklyn
+73,-73.8065843331469,40.7536974869459,East Flushing,Queens
+74,-73.9383104968462,40.8055655654775,East Harlem North,Manhattan
+75,-73.9449566676846,40.7906501597646,East Harlem South,Manhattan
+76,-73.8784173669943,40.6586863044662,East New York,Brooklyn
+77,-73.8957171569753,40.6677018744258,East New York/Pennsylvania Avenue,Brooklyn
+78,-73.8866455940689,40.8460302771125,East Tremont,Bronx
+79,-73.9852141243977,40.7279442789046,East Village,Manhattan
+80,-73.9423293071901,40.7144695745871,East Williamsburg,Brooklyn
+81,-73.8455325663154,40.8773588281637,Eastchester,Bronx
+82,-73.8723440095934,40.7384639203673,Elmhurst,Queens
+83,-73.889221829475,40.7401456031773,Elmhurst/Maspeth,Queens
+84,-74.1739373269384,40.5320172010873,Eltingville/Annadale/Prince's Bay,Staten Island
+85,-73.9522088368464,40.6472549665241,Erasmus,Brooklyn
+86,-73.7541867292922,40.6025539265615,Far Rockaway,Queens
+87,-74.0078121163865,40.706659617356,Financial District North,Manhattan
+88,-74.0113077706368,40.7033938017209,Financial District South,Manhattan
+89,-73.9626937337469,40.64098273381,Flatbush/Ditmas Park,Brooklyn
+90,-73.9967775123964,40.7425461606612,Flatiron,Manhattan
+91,-73.932543590688,40.6273899924426,Flatlands,Brooklyn
+92,-73.8304471275476,40.7641272834135,Flushing,Queens
+93,-73.8418927550086,40.7392346574674,Flushing Meadows-Corona Park,Queens
+94,-73.900591010865,40.8582607643596,Fordham South,Bronx
+95,-73.8482199362705,40.7234652723597,Forest Hills,Queens
+96,-73.8760122181239,40.6957609903045,Forest Park/Highland Park,Queens
+97,-73.975576580035,40.6906156556941,Fort Greene,Brooklyn
+98,-73.7795502346253,40.7338412186621,Fresh Meadows,Queens
+99,-74.187702737221,40.5796179453647,Freshkills Park,Staten Island
+100,-73.9887858928492,40.7535140917746,Garment District,Manhattan
+101,-73.7090705910379,40.7437217835813,Glen Oaks,Queens
+102,-73.8819987296701,40.7026793599143,Glendale,Queens
+103,-74.0451814058337,40.6898605712245,Governor's Island/Ellis Island/Liberty Island,Manhattan
+103,-74.0391308720565,40.6986732840584,Governor's Island/Ellis Island/Liberty Island,Manhattan
+103,-74.0187943689955,40.6881378249245,Governor's Island/Ellis Island/Liberty Island,Manhattan
+106,-73.9917635756064,40.6733611676171,Gowanus,Brooklyn
+107,-73.9833103532463,40.7373483763208,Gramercy,Manhattan
+108,-73.9803761510937,40.5891103441533,Gravesend,Brooklyn
+109,-74.1527146857649,40.5488307159092,Great Kills,Staten Island
+110,-74.1258464065771,40.5432675249983,Great Kills Park,Staten Island
+111,-73.990988902762,40.6521158025647,Green-Wood Cemetery,Brooklyn
+112,-73.9484721972696,40.7288303471399,Greenpoint,Brooklyn
+113,-73.9946282171513,40.7324859518256,Greenwich Village North,Manhattan
+114,-73.9986779466048,40.7286117315904,Greenwich Village South,Manhattan
+115,-74.0924861023209,40.6201275450588,Grymes Hill/Clifton,Staten Island
+116,-73.9473675432227,40.827535259806,Hamilton Heights,Manhattan
+117,-73.7760841015511,40.5960558298222,Hammels/Arverne,Queens
+118,-74.1370701313217,40.5856310621159,Heartland Village/Todt Hill,Staten Island
+119,-73.9269662109924,40.8367300313346,Highbridge,Bronx
+120,-73.9308068517413,40.8460040720504,Highbridge Park,Manhattan
+121,-73.7996883075716,40.7273471948519,Hillcrest/Pomonok,Queens
+122,-73.7615641851515,40.7110684671189,Hollis,Queens
+123,-73.9651983047602,40.6002048027388,Homecrest,Brooklyn
+124,-73.8500050428708,40.6595667686077,Howard Beach,Queens
+125,-74.0071756920052,40.7253763405368,Hudson Sq,Manhattan
+126,-73.8849772729363,40.813918192187,Hunts Point,Bronx
+127,-73.9203254718565,40.8650626832916,Inwood,Manhattan
+128,-73.9254320855109,40.8721876093241,Inwood Hill Park,Manhattan
+129,-73.8874063794919,40.7590574739636,Jackson Heights,Queens
+130,-73.8003475246798,40.7032738833012,Jamaica,Queens
+131,-73.7713744950727,40.720412328134,Jamaica Estates,Queens
+132,-73.778263658902,40.6426045217178,JFK Airport,Queens
+133,-73.9743361977043,40.6393872038878,Kensington,Brooklyn
+134,-73.8299927030517,40.7087501798075,Kew Gardens,Queens
+135,-73.8239855138759,40.7294099643617,Kew Gardens Hills,Queens
+136,-73.9057199662352,40.8648027310875,Kingsbridge Heights,Bronx
+137,-73.9771193707357,40.7403592357115,Kips Bay,Manhattan
+138,-73.8728037146871,40.7748673895178,LaGuardia Airport,Queens
+139,-73.7433230842567,40.6777493674309,Laurelton,Queens
+140,-73.9545680421348,40.7655068182316,Lenox Hill East,Manhattan
+141,-73.9597126787805,40.7668388165846,Lenox Hill West,Manhattan
+142,-73.9813524137399,40.7739059853886,Lincoln Square East,Manhattan
+143,-73.9879729664182,40.7757702095713,Lincoln Square West,Manhattan
+144,-73.9974066841975,40.7205814515076,Little Italy/NoLiTa,Manhattan
+145,-73.9486991918172,40.7465899679036,Long Island City/Hunters Point,Queens
+146,-73.9336340945451,40.7545288941149,Long Island City/Queens Plaza,Queens
+147,-73.8981929347315,40.8191985303455,Longwood,Bronx
+148,-73.9907183626618,40.7192116742044,Lower East Side,Manhattan
+149,-73.9484742088121,40.6065579859268,Madison,Brooklyn
+150,-73.9428842620734,40.5802623241253,Manhattan Beach,Brooklyn
+151,-73.9678083909732,40.797866268004,Manhattan Valley,Manhattan
+152,-73.9543248746271,40.8175772001908,Manhattanville,Manhattan
+153,-73.9110625766578,40.8756004059432,Marble Hill,Manhattan
+154,-73.8961233069501,40.593118666611,Marine Park/Floyd Bennett Field,Brooklyn
+155,-73.9067693423191,40.6177885388735,Marine Park/Mill Basin,Brooklyn
+156,-74.1648593047613,40.6287483754853,Mariners Harbor,Staten Island
+157,-73.9019256908442,40.7240393566008,Maspeth,Queens
+158,-74.0083857570117,40.735248066679,Meatpacking/West Village West,Manhattan
+159,-73.9135829897644,40.8182594605072,Melrose South,Bronx
+160,-73.8807122478637,40.7185038197548,Middle Village,Queens
+161,-73.9774318381095,40.7582264812217,Midtown Center,Manhattan
+162,-73.9721454850812,40.7568161553126,Midtown East,Manhattan
+163,-73.9783669880204,40.7644254534051,Midtown North,Manhattan
+164,-73.9859288064872,40.7488076668144,Midtown South,Manhattan
+165,-73.9546029482619,40.6209587000862,Midwood,Brooklyn
+166,-73.9618152643826,40.8095702282723,Morningside Heights,Manhattan
+167,-73.9044425941057,40.8279880356749,Morrisania/Melrose,Bronx
+168,-73.9170580317151,40.8074395308644,Mott Haven/Port Morris,Bronx
+169,-73.9050214600049,40.8491148219147,Mount Hope,Bronx
+170,-73.9769419924356,40.7476542811031,Murray Hill,Manhattan
+171,-73.8088807171629,40.7689436687832,Murray Hill-Queens,Queens
+172,-74.1039265024817,40.5724654201423,New Dorp/Midland Beach,Staten Island
+173,-73.8630837007824,40.7517792439212,North Corona,Queens
+174,-73.8777605618466,40.8768525921671,Norwood,Bronx
+175,-73.7573567716067,40.743273849336,Oakland Gardens,Queens
+176,-74.1196124324174,40.5620606070131,Oakwood,Staten Island
+177,-73.9111057913062,40.6770986129152,Ocean Hill,Brooklyn
+178,-73.9706887790417,40.6176331814343,Ocean Parkway South,Brooklyn
+179,-73.9268123582238,40.7714253418156,Old Astoria,Queens
+180,-73.8493643543423,40.6751638515945,Ozone Park,Queens
+181,-73.979044896845,40.672019140546,Park Slope,Brooklyn
+182,-73.8579416206911,40.8374043586994,Parkchester,Bronx
+183,-73.8318540529646,40.8495686468422,Pelham Bay,Bronx
+184,-73.8046108382118,40.8647854171785,Pelham Bay Park,Bronx
+185,-73.8551804331804,40.8534716586784,Pelham Parkway,Bronx
+186,-73.9924553277106,40.7484763617618,Penn Station/Madison Sq West,Manhattan
+187,-74.1411524820629,40.6254537199379,Port Richmond,Staten Island
+188,-73.94520015759,40.6575600630782,Prospect-Lefferts Gardens,Brooklyn
+189,-73.968269632125,40.6771824866237,Prospect Heights,Brooklyn
+190,-73.9709500738325,40.6606075676682,Prospect Park,Brooklyn
+191,-73.7411204207497,40.7142783033807,Queens Village,Queens
+192,-73.8151925356493,40.7444109985,Queensboro Hill,Queens
+193,-73.9402863033051,40.7617244307638,Queensbridge/Ravenswood,Queens
+194,-73.9210290324994,40.7914329582306,Randalls Island,Manhattan
+195,-74.009549313284,40.6754616838619,Red Hook,Brooklyn
+196,-73.8640102120344,40.7236478589889,Rego Park,Queens
+197,-73.8290435569064,40.6930004742218,Richmond Hill,Queens
+198,-73.9019937512221,40.7045215336003,Ridgewood,Queens
+199,-73.8835364013293,40.7920459432226,Rikers Island,Bronx
+200,-73.9064606078974,40.8998596211143,Riverdale/North Riverdale/Fieldston,Bronx
+201,-73.8472464785559,40.577468718337,Rockaway Park,Queens
+202,-73.9504104879777,40.7611679139077,Roosevelt Island,Manhattan
+203,-73.7367188089441,40.6595017770463,Rosedale,Queens
+204,-74.2069729508607,40.5407426890056,Rossville/Woodrow,Staten Island
+205,-73.7626156564313,40.6922189718589,Saint Albans,Queens
+206,-74.1231637564731,40.6359593979133,Saint George/New Brighton,Staten Island
+207,-73.8993250159726,40.7635113549352,Saint Michaels Cemetery/Woodside,Queens
+208,-73.824886342133,40.8246861384134,Schuylerville/Edgewater Park,Bronx
+209,-74.0023597244525,40.7084896914032,Seaport,Manhattan
+210,-73.9443360454416,40.5937616085004,Sheepshead Bay,Brooklyn
+211,-74.0013747299582,40.7238990714701,SoHo,Manhattan
+212,-73.8699019343016,40.8282769856365,Soundview/Bruckner,Bronx
+213,-73.8607910370492,40.816490762555,Soundview/Castle Hill,Bronx
+214,-74.0858854521269,40.5866190655298,South Beach/Dongan Hills,Staten Island
+215,-73.7903675378715,40.6941903013512,South Jamaica,Queens
+216,-73.8205132705286,40.6770712325623,South Ozone Park,Queens
+217,-73.9568457688564,40.703249610232,South Williamsburg,Brooklyn
+218,-73.7721162803473,40.6736200279408,Springfield Gardens North,Queens
+219,-73.7610079604205,40.6602284956109,Springfield Gardens South,Queens
+220,-73.9118772631728,40.8819009385792,Spuyten Duyvil/Kingsbridge,Bronx
+221,-74.0813347613662,40.6202239775881,Stapleton,Staten Island
+222,-73.8821375169092,40.6468719097136,Starrett City,Brooklyn
+223,-73.906671830047,40.7781933282005,Steinway,Queens
+224,-73.9778420751297,40.7317281591558,Stuy Town/Peter Cooper Village,Manhattan
+225,-73.9314448522357,40.6887850672001,Stuyvesant Heights,Brooklyn
+226,-73.9293212827563,40.7352734102809,Sunnyside,Queens
+227,-74.0067945517929,40.6415976370168,Sunset Park East,Brooklyn
+228,-74.009960471569,40.6536182069019,Sunset Park West,Brooklyn
+229,-73.9651741989212,40.7565892984893,Sutton Place/Turtle Bay North,Manhattan
+230,-73.9841761608222,40.7598447400293,Times Sq/Theatre District,Manhattan
+231,-74.0067116243899,40.7186956410077,TriBeCa/Civic Center,Manhattan
+232,-73.9823067049464,40.7153860841071,Two Bridges/Seward Park,Manhattan
+233,-73.9712559771296,40.7491720982236,UN/Turtle Bay South,Manhattan
+234,-73.9904776189691,40.7403134645878,Union Sq,Manhattan
+235,-73.9149683294755,40.8535029301211,University Heights/Morris Heights,Bronx
+236,-73.9569723853064,40.7804914757347,Upper East Side North,Manhattan
+237,-73.9656914919459,40.7685418846646,Upper East Side South,Manhattan
+238,-73.9728145174503,40.7917662467824,Upper West Side North,Manhattan
+239,-73.9782733418486,40.7841073645538,Upper West Side South,Manhattan
+240,-73.8790671945698,40.8947464191253,Van Cortlandt Park,Bronx
+241,-73.8964501437395,40.8759758476571,Van Cortlandt Village,Bronx
+242,-73.8398789614734,40.8495965844009,Van Nest/Morris Park,Bronx
+243,-73.9328243182356,40.8586702866313,Washington Heights North,Manhattan
+244,-73.9416668140575,40.8412186698664,Washington Heights South,Manhattan
+245,-74.1031706067521,40.628478475102,West Brighton,Staten Island
+246,-74.004512572075,40.7524372519647,West Chelsea/Hudson Yards,Manhattan
+247,-73.9250591826957,40.8292114745072,West Concourse,Bronx
+248,-73.8710104534108,40.8346771999308,West Farms/Bronx River,Bronx
+249,-74.0024969086272,40.7346115559814,West Village,Manhattan
+250,-73.8494786753898,40.8324906886295,Westchester Village/Unionport,Bronx
+251,-74.1232368409925,40.6196055161537,Westerleigh,Staten Island
+252,-73.8153942986072,40.788360648021,Whitestone,Queens
+253,-73.8414745485909,40.7600863368444,Willets Point,Queens
+254,-73.8582696518177,40.8832233292992,Williamsbridge/Olinville,Bronx
+255,-73.9571337756889,40.7188341922346,Williamsburg (North Side),Brooklyn
+256,-73.9591078838642,40.7109771296904,Williamsburg (South Side),Brooklyn
+257,-73.9772393111852,40.6536644952118,Windsor Terrace,Brooklyn
+258,-73.8566390530717,40.6901263678129,Woodhaven,Queens
+259,-73.8563511172889,40.8991027731978,Woodlawn/Wakefield,Bronx
+260,-73.9037132789432,40.7467977944692,Woodside,Queens
+261,-74.0129193755126,40.708975618892,World Trade Center,Manhattan
+262,-73.9458298180079,40.7765342289951,Yorkville East,Manhattan
+263,-73.9512079916544,40.7784958687768,Yorkville West,Manhattan
\ No newline at end of file
diff --git a/content/visualisation.md b/content/visualisation.md
new file mode 100644
index 0000000..74a0103
--- /dev/null
+++ b/content/visualisation.md
@@ -0,0 +1,93 @@
+# Visualisations and dashboards
+
+:::{questions}
+- What syntax is used to make a lesson?
+- How do you structure a lesson effectively for teaching?
+- `questions` are at the top of a lesson and provide a starting
+  point for what you might learn.  It is usually a bulleted list.
+:::
+
+:::{objectives}
+- Show a complete lesson page with all of the most common
+  structures.
+- ...
+
+This is also a holdover from the carpentries-style.  It could
+usually be left off.
+:::
+
+
+
+
+The introduction should be a high level overview of what is on the
+page and why it is interesting.
+
+
+The lines below (only in the source) will set the default highlighting
+language for the entire page.
+
+:::{highlight} python
+:::
+
+
+
+## Section
+
+A section.
+
+:::{discussion}
+Discuss the following.
+
+- A discussion section
+- Another discussion topic
+:::
+
+
+
+## Section
+
+```
+print("hello world")
+# This uses the default highlighting language
+```
+
+```python
+print("hello world)
+```
+
+
+
+## Exercises: description
+
+:::{exercise} Exercise Topic-1: imperative description of exercise
+Exercise text here.
+:::
+
+:::{solution}
+Solution text here
+:::
+
+
+
+## Summary
+
+A Summary of what you learned and why it might be useful.  Maybe a
+hint of what comes next.
+
+
+
+## See also
+
+- Other relevant links
+- Other link
+
+
+
+:::{keypoints}
+- What the learner should take away
+- point 2
+- ...
+
+This is another holdover from the carpentries style.  This perhaps
+is better done in a "summary" section.
+:::
diff --git a/content/yellow_tripdata_2025-01.parquet b/content/yellow_tripdata_2025-01.parquet
new file mode 100644
index 0000000..f89625d
Binary files /dev/null and b/content/yellow_tripdata_2025-01.parquet differ