Sebastijan-Dominis · Sebastijan-Dominis · Mar 28, 2026 · Mar 25, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -50,20 +50,20 @@ coverage.json
 *.py.cover
 .hypothesis/
 .pytest_cache/
-cover/
+/cover/
 
 # Translations
 *.mo
 *.pot
 
 # Django stuff:
-*.log
+# *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 
 # Flask stuff:
-instance/
+/instance/
 .webassets-cache
 
 # Scrapy stuff:
@@ -74,7 +74,7 @@ docs/_build/
 
 # PyBuilder
 .pybuilder/
-target/
+/target/
 
 # Jupyter Notebook
 .ipynb_checkpoints
@@ -139,9 +139,9 @@ celerybeat.pid
 .env
 .envrc
 .venv
-env/
+/env/
 venv/
-ENV/
+/ENV/
 env.bak/
 venv.bak/
 
@@ -212,16 +212,22 @@ __marimo__/
 .history/
 
 # CatBoost model snapshot files
-catboost_info/
+/catboost_info/
 
 # VSCodeCounter cache
 .vscodecounter/
 
 # Failure management folder that helps deal with failed search and train runs by storing relevant information at runtime (best params from each search, training model snapshots) and deleting if the run is successful. Irrelevant for version control.
 /failure_management/
 
-# Temporarily ignore all experiments, data, feature sets, and model registry files. These are all relevant for version control, but we want to temporarily ignore them while we work on the registry and experiment tracking features.
+# Ignore all experiments, data, feature sets, model registry & archive, predictions, and monitoring files, as well as log files. 
+# These are all potentially relevant for version control, but we want to temporarily ignore them while we work 
+# on the registry and experiment tracking features.
 /experiments/
 /data/
 /feature_store/
-/model_registry/
+/model_registry/
+/predictions/
+/monitoring/
+/orchestration_logs/
+/scripts_logs/
diff --git a/Dockerfile b/Dockerfile
@@ -5,7 +5,7 @@
 # For faster development iterations, consider using a local Conda environment on your machine with the 
 # same environment.yml file.
 
-# ===== Base image with GPU support =====
+# Base image with GPU support
 FROM pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime
 # NOTE: Uncomment the line below and comment out the line above only if you want to generate fake data.
 # For regular use, stick to the current image. If you use the image below, make sure to also uncomment
@@ -14,14 +14,14 @@ FROM pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime
 
 # FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
 
-# ===== Set working directory =====
+# Set working directory
 WORKDIR /app
 
-# ===== Timezone fix =====
+# Timezone fix
 ENV TZ=Europe/Zagreb
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
-# ===== Install Miniconda and Git =====
+# Install Miniconda and Git
 RUN apt-get update && apt-get install -y wget bzip2 git && \
     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
     bash /tmp/miniconda.sh -b -p /opt/conda && \
@@ -31,14 +31,14 @@ RUN apt-get update && apt-get install -y wget bzip2 git && \
 # Add conda to PATH
 ENV PATH="/opt/conda/bin:$PATH"
 
-# ===== Accept Conda Terms of Service =====
+# Accept Conda Terms of Service
 RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
     conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
 
-# ===== Copy environment.yml first for faster rebuilds =====
+# Copy environment.yml first for faster rebuilds
 COPY environment.yml /tmp/environment.yml
 
-# ===== Create Conda environment =====
+# Create Conda environment
 # Create env
 RUN conda env create -f /tmp/environment.yml -n hotel_management
 
@@ -61,10 +61,10 @@ COPY pyproject.toml .
 # Install rest
 RUN conda run -n hotel_management pip install -r /tmp/requirements.txt
 
-# ===== Use the environment for all container commands =====
+# Use the environment for all container commands
 SHELL ["conda", "run", "-n", "hotel_management", "/bin/bash", "-c"]
 
-# ===== Copy the code =====
+# Copy the code
 COPY ml ./ml
 COPY pipelines ./pipelines
 COPY scripts ./scripts
@@ -73,11 +73,11 @@ COPY ml_service ./ml_service
 # Install the package
 RUN conda run -n hotel_management pip install -e .
 
-# ===== Expose ports =====
+# Expose ports
 EXPOSE 8000
 EXPOSE 8050
 
-# ===== Run all services =====
+# Run all services
 CMD bash -c "\
     uvicorn ml_service.backend.main:app --reload --host 0.0.0.0 --port 8000 & \
     python -m ml_service.frontend.app --host 0.0.0.0 --port 8050 & \

diff --git a/README.md b/README.md
@@ -9,11 +9,12 @@
     - located in `data/raw/hotel_bookings/v1/2026-02-25T22-43-23_732dfdb7/data.csv`
     - originally from https://www.kaggle.com/datasets/mojtaba142/hotel-booking 
 - Current architecture expanded to support many datasets.
-- The ml workflow covers everything from the registration of a raw data snapshot to model promotion.
+- The ml workflow covers everything from the registration of a raw data snapshot to model monitoring.
 > Note: the repo was previously named `hotel_management`, so you will see that name around the repo; renamed for clarity on what the project does
 
 ## Features
 
+Pipelines for every part of the ml workflow:
 - Data preprocessing
   - Register raw data snapshots
   - Build interim and processed datasets
@@ -25,6 +26,25 @@
 - Model promotion
   - Includes model registry for staging and production
   - Archives past production models
+- Model inference
+- Model monitoring
+
+Maximum **decoupling** of datasets, feature sets, and modeling
+- Datasets merge at runtime, using predefined configs and DAG for ordering
+- Feature sets merge at runtime using a predefined entity key
+- Models can use any snapshots of datasets and feature sets via snapshot bindings registry
+- Validation ensures consistency and predefined minimum row presence
+
+Full **reproducibility**
+- Hashing and downstream validation of relevant `artifacts` and `configs`
+- Runtime info validation (hardware, git commit, environment...)
+
+Code **quality** ensured by CI, which includes:
+- `ruff` checks
+- `mypy` checks (moderate strictness)
+- import layer checks
+- naming conventions checks
+- **1235 tests** -> fails if coverage drops below 90%
 
 ## Installation
 
@@ -44,21 +64,33 @@ Two options:
 
 See the [usage guide](docs/usage.md) for instructions on running the workflow.
 
-### Usage examples (via `ml_service` and `Docker`):
+### Usage examples (via `ml_service`):
 
-#### Pipelines
+#### Modeling Configs Writing, Validation, Saving, and Viewing
 
-!["Gif portrayal of pipelines app from ml_service"](assets/gifs/ml_service_pipelines_v2.gif)
+!["Gif portrayal of writing modeling configs with ml_service"](assets/gifs/ml_service_modeling_configs_v3.gif)
 
-#### Modeling Configs
+**Similar functionality exists for other supported configs**
 
-!["Gif portrayal of modeling configs app from ml_service"](assets/gifs/ml_service_modeling_configs_v2.gif)
+#### Pipeline Running and Artifact Viewing
+
+!["Gif portrayal of running a pipeline with ml_service"](assets/gifs/ml_service_pipelines_v3.gif)
+
+**Similar functionality exists for scripts**
+
+#### Documentation Reading in Browser
+
+!["Gif portrayal of reading the docs with ml_service"](assets/gifs/ml_service_docs_v1.gif)
+
+#### Directory Structure Viewing in Browser
+
+["Gif portrayal of viewing directory structure with ml_service"](assets/gifs/ml_service_dir_viewer_v1.gif)
 
 ## Architecture
 
 ### Artifact Lineage (high-level overview)
 
-![Artifact Lineage Diagram](docs/architecture/img/artifact_lineage_v2.png)
+![Artifact Lineage Diagram](docs/architecture/img/artifact_lineage_v3.png)
 
 ### Details
 

diff --git a/assets/gifs/ml_service_dir_viewer_v1.gif b/assets/gifs/ml_service_dir_viewer_v1.gif
diff --git a/assets/gifs/ml_service_docs_v1.gif b/assets/gifs/ml_service_docs_v1.gif
diff --git a/assets/gifs/ml_service_modeling_configs_v2.gif b/assets/gifs/ml_service_modeling_configs_v2.gif
diff --git a/assets/gifs/ml_service_modeling_configs_v3.gif b/assets/gifs/ml_service_modeling_configs_v3.gif
diff --git a/assets/gifs/ml_service_pipelines_v2.gif b/assets/gifs/ml_service_pipelines_v2.gif
diff --git a/assets/gifs/ml_service_pipelines_v3.gif b/assets/gifs/ml_service_pipelines_v3.gif
diff --git a/assets/img/docs/architecture/artifact_lineage_v2.png b/assets/img/docs/architecture/artifact_lineage_v2.png
diff --git a/assets/img/docs/architecture/artifact_lineage_v3.png b/assets/img/docs/architecture/artifact_lineage_v3.png
diff --git a/assets/img/docs/architecture/freeze_v2.png b/assets/img/docs/architecture/freeze_v2.png
diff --git a/assets/img/docs/architecture/freeze_v3.png b/assets/img/docs/architecture/freeze_v3.png
diff --git a/assets/img/docs/architecture/infer_v1.png b/assets/img/docs/architecture/infer_v1.png
diff --git a/assets/img/docs/architecture/monitor_v1.png b/assets/img/docs/architecture/monitor_v1.png
diff --git a/assets/img/docs/architecture/search_v2.png b/assets/img/docs/architecture/search_v2.png
diff --git a/assets/img/docs/architecture/search_v3.png b/assets/img/docs/architecture/search_v3.png
diff --git a/assets/img/docs/architecture/train_v2.png b/assets/img/docs/architecture/train_v2.png
diff --git a/assets/img/docs/architecture/train_v3.png b/assets/img/docs/architecture/train_v3.png
diff --git a/configs/data/interim/hotel_bookings/v1.yaml b/configs/data/interim/hotel_bookings/v1.yaml
@@ -153,7 +153,7 @@ drop_duplicates: true
 
 drop_missing_ints: true
 
-min_rows: 100000
+min_rows: 5000
 
 lineage:
   created_by: Sebastijan

diff --git a/configs/env/dev.yaml b/configs/env/dev.yaml
@@ -0,0 +1,20 @@
+cv: 2
+
+training:
+  iterations: 1
+  hardware:
+    task_type: "cpu"
+  snapshot_interval_seconds: 300
+
+search:
+  broad:
+    iterations: 1
+    n_iter: 1
+  narrow:
+    iterations: 1
+    n_iter: 1
+  hardware:
+    task_type: "cpu"
+  error_score: "raise"
+
+verbose: 100
diff --git a/configs/env/prod.yaml b/configs/env/prod.yaml
@@ -0,0 +1,4 @@
+training:
+  snapshot_interval_seconds: 60
+
+verbose: 10
diff --git a/configs/env/test.yaml b/configs/env/test.yaml
@@ -0,0 +1,21 @@
+cv: 2
+
+training:
+  iterations: 10
+  hardware:
+    task_type: "cpu"
+  snapshot_interval_seconds: 300
+
+search:
+  broad:
+    iterations: 10
+    n_iter: 2
+  narrow:
+    iterations: 10
+    n_iter: 2
+  hardware:
+    task_type: "gpu"
+    devices: [0]
+  error_score: "raise"
+
+verbose: 100