From abc86d9b3707b15fe22d9c9c9c380c3364be3d07 Mon Sep 17 00:00:00 2001 From: Nicolas Merinian Date: Thu, 19 Feb 2026 21:33:27 +0100 Subject: [PATCH 1/5] tech/dbt-add-marts: created marts because marts should be exposed instead of intermediate tables --- dbt_project.yml | 3 +- models/marts/_marts_models.yml | 30 +++++++++++++++++++ .../mart_mensq_pluviometrie_sup_100mm.sql | 4 +++ .../mart_mensq_temperatures_sup_20deg.sql | 4 +++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 models/marts/_marts_models.yml create mode 100644 models/marts/mart_mensq_pluviometrie_sup_100mm.sql create mode 100644 models/marts/mart_mensq_temperatures_sup_20deg.sql diff --git a/dbt_project.yml b/dbt_project.yml index 0a6c07e..8533ff9 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'projet_cc' -version: '1.0.0' +version: '1.0.1' config-version: 2 profile: 'projet_cc' @@ -13,3 +13,4 @@ clean-targets: ["target", "dbt_packages"] models: projet_cc: +materialized: view + marts: \ No newline at end of file diff --git a/models/marts/_marts_models.yml b/models/marts/_marts_models.yml new file mode 100644 index 0000000..b763723 --- /dev/null +++ b/models/marts/_marts_models.yml @@ -0,0 +1,30 @@ + +version: 2 + +models: + - name: mart_mensq_temperatures_sup_20deg + description: > + Table métier utilisée par le dashboard Streamlit. + Contient les mois avec température moyenne supérieure à 20°C. + + columns: + - name: annee + tests: + - not_null + + - name: mois + tests: + - not_null + - name: mart_mensq_pluviometrie_sup_100mm + description: > + Table métier utilisée par le dashboard Streamlit. + Contient les mois avec pluviométrie supérieure à 100mm. + + columns: + - name: annee + tests: + - not_null + + - name: mois + tests: + - not_null \ No newline at end of file diff --git a/models/marts/mart_mensq_pluviometrie_sup_100mm.sql b/models/marts/mart_mensq_pluviometrie_sup_100mm.sql new file mode 100644 index 0000000..7b37acf --- /dev/null +++ b/models/marts/mart_mensq_pluviometrie_sup_100mm.sql @@ -0,0 +1,4 @@ +-- + +select * +from {{ ref('int_mensq_pluviometrie_sup_100mm') }} \ No newline at end of file diff --git a/models/marts/mart_mensq_temperatures_sup_20deg.sql b/models/marts/mart_mensq_temperatures_sup_20deg.sql new file mode 100644 index 0000000..a17cb15 --- /dev/null +++ b/models/marts/mart_mensq_temperatures_sup_20deg.sql @@ -0,0 +1,4 @@ +-- + +select * +from {{ ref('int_mensq_temperatures_sup_20deg') }} From 3f15816069c4f3b3b47b58dac524f6aae5316c82 Mon Sep 17 00:00:00 2001 From: Nicolas Merinian Date: Thu, 19 Feb 2026 22:43:49 +0100 Subject: [PATCH 2/5] tech/dbt-add-marts: deleted unused queries --- src/data_layer/bigquery.py | 45 -------------------------------------- 1 file changed, 45 deletions(-) diff --git a/src/data_layer/bigquery.py b/src/data_layer/bigquery.py index 9438e0b..eb58fdb 100644 --- a/src/data_layer/bigquery.py +++ b/src/data_layer/bigquery.py @@ -20,51 +20,6 @@ def get_bq_client(): def run_query(sql: str): client = get_bq_client() return client.query(sql).to_dataframe() - -# Exemple de fonction qui fait un truc -def get_todo1(): - return run_query(f""" - WITH CTE AS ( - SELECT - ANNEE AS annee, - MOIS AS mois, - -- On garde AAAAMM pour le tri ou l'affichage chronologique - AAAAMM AS date_key, - - -- Somme des précipitations pour la période donnée - AVG(RR) AS Cumul_Mensuel_Pluie_Total, - - -- 'NBRR', c'est le nombre de jours de pluie - - -- RRAB précipitation maximale tombée en 24 heures au cours du mois (Average) - AVG(RRAB) AS Cumul_MAxi_par_mois, - - -- Nombre de jours > 100mm - AVG(NBJRR100) AS Nb_Jours_Sup_100mm - - FROM `cc-reunion.data_meteofrance.stg_mensq_pluviometrie` - -- Indispensable pour fusionner les données de toutes les stations par mois - GROUP BY - ANNEE, - MOIS, - AAAAMM - - -- Tri par ordre chronologique (ce qui réglera aussi ton problème de tri dans le graph) - ORDER BY - AAAAMM ASC - ) - - SELECT * - FROM CTE - WHERE Nb_Jours_Sup_100mm > 1 - """) - -def get_data(): - return run_query(""" - SELECT ANNEE, moy_nuits_ge_20 - FROM `cc-reunion.data_meteofrance.int_mensq_temperatures_sup_20deg` - ORDER BY ANNEE ASC - """) def get_nb_moy_nuits_sup_20deg(): return run_query(""" From 29e4123281cc75d892348be0ba8140f6b4927c6d Mon Sep 17 00:00:00 2001 From: Nicolas Merinian Date: Thu, 19 Feb 2026 22:45:06 +0100 Subject: [PATCH 3/5] tech/dbt-add-marts: replace tables in queries by mart tables --- src/data_layer/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_layer/bigquery.py b/src/data_layer/bigquery.py index eb58fdb..fcf7055 100644 --- a/src/data_layer/bigquery.py +++ b/src/data_layer/bigquery.py @@ -24,7 +24,7 @@ def run_query(sql: str): def get_nb_moy_nuits_sup_20deg(): return run_query(""" SELECT ANNEE, AVG(moy_nuits_ge_20) as nb_moy_nuits_sup_20deg - FROM `cc-reunion.data_meteofrance.int_mensq_temperatures_sup_20deg` + FROM `cc-reunion.data_meteofrance.mart_mensq_temperatures_sup_20deg` GROUP BY ANNEE ORDER BY ANNEE ASC """) From 413fd3e51fce98e242e68e6f2b8a538c0811d53f Mon Sep 17 00:00:00 2001 From: Nicolas Merinian Date: Fri, 20 Feb 2026 00:09:21 +0100 Subject: [PATCH 4/5] tech/dbt-add-marts: created a file to track the tables that don't respect dbt process and that should be brought into compliance with dbt --- src/data_layer/diag.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/data_layer/diag.py diff --git a/src/data_layer/diag.py b/src/data_layer/diag.py new file mode 100644 index 0000000..722361c --- /dev/null +++ b/src/data_layer/diag.py @@ -0,0 +1,17 @@ +# This file is meant to be temporary. Once the changes described below are implemented, this file should be deleted. +# List of tables to be used in the project, sorted by dataset. +# The goal is to have a clear overview of the tables we are using that aren't using dbt processes, +# so that we can easily identify which tables we need to update. +# For example, stg_ -> int_ - > mart_ (or similar) for tables that are currently in stg_ +# but should be in int_ or mart_. +""" +data_meteofrance.stg_mensq_pluviometrie +data_meteofrance.stg_mensq_temperatures +data_meteofrance.histo_simu_ann +data_meteofrance.histo_simu_geo + +MENS_meteofrance.stations +MENS_meteofrance.stations_zones +MENS_meteofrance.Table_NBJTXS32_ANNEE +MENS_meteofrance.Table_sim_2100 +""" \ No newline at end of file From 19c68576422d5650c50ea65e9bb522382e429137 Mon Sep 17 00:00:00 2001 From: Nicolas Merinian Date: Fri, 20 Feb 2026 00:13:46 +0100 Subject: [PATCH 5/5] tech/dbt-add-marts: created a mapping so that all of the actual tables' names are located in one place only, and sql queries make use of aliases --- src/data_layer/bigquery.py | 74 ++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/src/data_layer/bigquery.py b/src/data_layer/bigquery.py index fcf7055..5a6c89b 100644 --- a/src/data_layer/bigquery.py +++ b/src/data_layer/bigquery.py @@ -21,24 +21,44 @@ def run_query(sql: str): client = get_bq_client() return client.query(sql).to_dataframe() +# Cette fonction est un mapping pour accéder à la table définie dans le dictionnaire TABLES. +TABLES = { + "histo_simu_ann": "cc-reunion.data_meteofrance.histo_simu_ann", + "histo_simu_geo": "cc-reunion.data_meteofrance.histo_simu_geo", + "mensq_pluviometrie": "cc-reunion.data_meteofrance.stg_mensq_pluviometrie", + "mensq_temperatures": "cc-reunion.data_meteofrance.stg_mensq_temperatures", + "mensq_temperatures_sup_20deg": "cc-reunion.data_meteofrance.mart_mensq_temperatures_sup_20deg", + "stations": "cc-reunion.MENS_meteofrance.stations", + "stations_zones": "cc-reunion.MENS_meteofrance.stations_zones", + "Table_NBJTXS32_ANNEE": "cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE", + "Table_sim_2100": "cc-reunion.MENS_meteofrance.Table_sim_2100" +} + +# Cette fonction est un raccourci pour accéder aux tables définies dans le dictionnaire TABLES. +def t(table_name: str): + return TABLES[table_name] + def get_nb_moy_nuits_sup_20deg(): - return run_query(""" + table_name = t("mensq_temperatures_sup_20deg") + return run_query(f""" SELECT ANNEE, AVG(moy_nuits_ge_20) as nb_moy_nuits_sup_20deg - FROM `cc-reunion.data_meteofrance.mart_mensq_temperatures_sup_20deg` + FROM `{table_name}` GROUP BY ANNEE ORDER BY ANNEE ASC """) def get_nb_moy_nuits_sup_20deg_par_zone_par_annee(): - return run_query(""" + table_name_temp = t("mensq_temperatures") + table_name_stations_zones = t("stations_zones") + return run_query(f""" WITH nuit_par_station AS ( SELECT t.NUM_POSTE, sz.Z_GEO, t.ANNEE, SUM(t.NBJTNS20) AS nuits_ge_20_par_station - FROM `cc-reunion.data_meteofrance.stg_mensq_temperatures` t - JOIN `cc-reunion.MENS_meteofrance.stations_zones` sz + FROM `{table_name_temp}` t + JOIN `{table_name_stations_zones}` sz ON t.NUM_POSTE = sz.NUM_POSTE GROUP BY t.NUM_POSTE, sz.Z_GEO, t.ANNEE ), @@ -61,23 +81,27 @@ def get_nb_moy_nuits_sup_20deg_par_zone_par_annee(): FROM par_zone ORDER BY zone_geographique, ANNEE """) + def get_table_histo_simu(): + table_name = t("histo_simu_ann") return run_query(f""" SELECT * - FROM `cc-reunion.data_meteofrance.histo_simu_ann` + FROM `{table_name}` """) -def get_table(tab_name): +def get_table(table_name): return run_query(f""" SELECT * - FROM `{tab_name}` + FROM `{table_name}` """) def get_full_table_for_cyclone(): - return run_query("SELECT * FROM `cc-reunion.data_meteofrance.histo_simu_geo`") + table_name = t("histo_simu_geo") + return run_query(f"SELECT * FROM `{table_name}`") def get_table_pluie_extreme(): - return run_query(""" + table_name = t("histo_simu_geo") + return run_query(f""" WITH CTE AS ( SELECT NBJFXI3S16X, @@ -96,7 +120,7 @@ def get_table_pluie_extreme(): ELSE 'Normal' END AS Forte_pluviometrie FROM - `cc-reunion.data_meteofrance.histo_simu_geo` AS t1 + `{table_name}` AS t1 ) SELECT @@ -113,7 +137,7 @@ def get_table_pluie_extreme(): FROM CTE """) - #histo_ann +#histo_ann # CREATE OR REPLACE TABLE `data_meteofrance.histo_simu_geo` AS ( # SELECT # NUM_POSTE, @@ -134,7 +158,8 @@ def get_table_pluie_extreme(): def get_detection_precip_superieure100mm(): - return run_query(""" + table_name = t("mensq_pluviometrie") + return run_query(f""" WITH CTE AS ( SELECT ANNEE AS annee, @@ -153,7 +178,7 @@ def get_detection_precip_superieure100mm(): -- Nombre de jours > 100mm AVG(NBJRR100) AS Nb_Jours_Sup_100mm - FROM `cc-reunion.data_meteofrance.stg_mensq_pluviometrie` + FROM `{table_name}` GROUP BY ANNEE, MOIS, @@ -171,7 +196,9 @@ def get_detection_precip_superieure100mm(): # --- Requête SQL --- # La requête SQL reste inchangée, elle récupère toutes les données annuelles agrégées par zone. def get_annuelles_par_zone(): - return run_query(""" + table_name_annee = t("Table_NBJTXS32_ANNEE") + stations = t("stations") + return run_query(f""" WITH CTE AS ( SELECT t1.ANNEE, @@ -180,9 +207,9 @@ def get_annuelles_par_zone(): AVG(t1.total_jours_sup_32c_annuel) AS moyenne_jours_chauds_zone, COUNT(DISTINCT t1.NUM_POSTE) AS nombre_stations_incluses FROM - `cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE` AS t1 + `{table_name_annee}` AS t1 INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS t2 + `{stations}` AS t2 ON t1.NUM_POSTE = t2.NUM_POSTE GROUP BY t1.ANNEE, @@ -205,7 +232,10 @@ def get_annuelles_par_zone(): # --- Requête SQL (Version Statique 2100) --- # La requête a été simplifiée et corrigée pour ne calculer que la projection en 2100. def get_projection_2100(): - return run_query(""" + table_name_annee = t("Table_NBJTXS32_ANNEE") + stations = t("stations") + table_sim_2100 = t("Table_sim_2100") + return run_query(f""" WITH T_OBS_REF AS ( -- 1. CALCUL DE LA BASELINE OBSERVÉE PAR STATION (Moyenne 1991-2020) SELECT @@ -214,9 +244,9 @@ def get_projection_2100(): t2.Z_GEO, AVG(t1.total_jours_sup_32c_annuel) AS baseline_jours_chauds_ref FROM - `cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE` AS t1 + `{table_name_annee}` AS t1 INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS t2 + `{stations}` AS t2 ON t1.NUM_POSTE = t2.NUM_POSTE WHERE t1.ANNEE BETWEEN '1991' AND '2020' @@ -231,7 +261,7 @@ def get_projection_2100(): t2.Z_GEO, AVG(t2.NBJTXS32) AS delta_jours_chauds_moyen_2100 FROM - `cc-reunion.MENS_meteofrance.Table_sim_2100` AS t2 + `{table_sim_2100}` AS t2 WHERE EXTRACT(YEAR FROM t2.date_2100) = 2100 GROUP BY @@ -257,7 +287,7 @@ def get_projection_2100(): FROM T_PROJ_AGR AS T_PROJ INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS T_ST + `{stations}` AS T_ST ON T_PROJ.Z_GEO = T_ST.Z_GEO INNER JOIN T_OBS_REF