diff --git a/dbt_project.yml b/dbt_project.yml index 0a6c07e..8533ff9 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'projet_cc' -version: '1.0.0' +version: '1.0.1' config-version: 2 profile: 'projet_cc' @@ -13,3 +13,4 @@ clean-targets: ["target", "dbt_packages"] models: projet_cc: +materialized: view + marts: \ No newline at end of file diff --git a/models/marts/_marts_models.yml b/models/marts/_marts_models.yml new file mode 100644 index 0000000..b763723 --- /dev/null +++ b/models/marts/_marts_models.yml @@ -0,0 +1,30 @@ + +version: 2 + +models: + - name: mart_mensq_temperatures_sup_20deg + description: > + Table métier utilisée par le dashboard Streamlit. + Contient les mois avec température moyenne supérieure à 20°C. + + columns: + - name: annee + tests: + - not_null + + - name: mois + tests: + - not_null + - name: mart_mensq_pluviometrie_sup_100mm + description: > + Table métier utilisée par le dashboard Streamlit. + Contient les mois avec pluviométrie supérieure à 100mm. + + columns: + - name: annee + tests: + - not_null + + - name: mois + tests: + - not_null \ No newline at end of file diff --git a/models/marts/mart_mensq_pluviometrie_sup_100mm.sql b/models/marts/mart_mensq_pluviometrie_sup_100mm.sql new file mode 100644 index 0000000..7b37acf --- /dev/null +++ b/models/marts/mart_mensq_pluviometrie_sup_100mm.sql @@ -0,0 +1,4 @@ +-- + +select * +from {{ ref('int_mensq_pluviometrie_sup_100mm') }} \ No newline at end of file diff --git a/models/marts/mart_mensq_temperatures_sup_20deg.sql b/models/marts/mart_mensq_temperatures_sup_20deg.sql new file mode 100644 index 0000000..a17cb15 --- /dev/null +++ b/models/marts/mart_mensq_temperatures_sup_20deg.sql @@ -0,0 +1,4 @@ +-- + +select * +from {{ ref('int_mensq_temperatures_sup_20deg') }} diff --git a/src/data_layer/bigquery.py b/src/data_layer/bigquery.py index 9438e0b..5a6c89b 100644 --- a/src/data_layer/bigquery.py +++ b/src/data_layer/bigquery.py @@ -20,70 +20,45 @@ def get_bq_client(): def run_query(sql: str): client = get_bq_client() return client.query(sql).to_dataframe() - -# Exemple de fonction qui fait un truc -def get_todo1(): - return run_query(f""" - WITH CTE AS ( - SELECT - ANNEE AS annee, - MOIS AS mois, - -- On garde AAAAMM pour le tri ou l'affichage chronologique - AAAAMM AS date_key, - -- Somme des précipitations pour la période donnée - AVG(RR) AS Cumul_Mensuel_Pluie_Total, - - -- 'NBRR', c'est le nombre de jours de pluie - - -- RRAB précipitation maximale tombée en 24 heures au cours du mois (Average) - AVG(RRAB) AS Cumul_MAxi_par_mois, - - -- Nombre de jours > 100mm - AVG(NBJRR100) AS Nb_Jours_Sup_100mm - - FROM `cc-reunion.data_meteofrance.stg_mensq_pluviometrie` - -- Indispensable pour fusionner les données de toutes les stations par mois - GROUP BY - ANNEE, - MOIS, - AAAAMM - - -- Tri par ordre chronologique (ce qui réglera aussi ton problème de tri dans le graph) - ORDER BY - AAAAMM ASC - ) - - SELECT * - FROM CTE - WHERE Nb_Jours_Sup_100mm > 1 - """) - -def get_data(): - return run_query(""" - SELECT ANNEE, moy_nuits_ge_20 - FROM `cc-reunion.data_meteofrance.int_mensq_temperatures_sup_20deg` - ORDER BY ANNEE ASC - """) +# Cette fonction est un mapping pour accéder à la table définie dans le dictionnaire TABLES. +TABLES = { + "histo_simu_ann": "cc-reunion.data_meteofrance.histo_simu_ann", + "histo_simu_geo": "cc-reunion.data_meteofrance.histo_simu_geo", + "mensq_pluviometrie": "cc-reunion.data_meteofrance.stg_mensq_pluviometrie", + "mensq_temperatures": "cc-reunion.data_meteofrance.stg_mensq_temperatures", + "mensq_temperatures_sup_20deg": "cc-reunion.data_meteofrance.mart_mensq_temperatures_sup_20deg", + "stations": "cc-reunion.MENS_meteofrance.stations", + "stations_zones": "cc-reunion.MENS_meteofrance.stations_zones", + "Table_NBJTXS32_ANNEE": "cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE", + "Table_sim_2100": "cc-reunion.MENS_meteofrance.Table_sim_2100" +} + +# Cette fonction est un raccourci pour accéder aux tables définies dans le dictionnaire TABLES. +def t(table_name: str): + return TABLES[table_name] def get_nb_moy_nuits_sup_20deg(): - return run_query(""" + table_name = t("mensq_temperatures_sup_20deg") + return run_query(f""" SELECT ANNEE, AVG(moy_nuits_ge_20) as nb_moy_nuits_sup_20deg - FROM `cc-reunion.data_meteofrance.int_mensq_temperatures_sup_20deg` + FROM `{table_name}` GROUP BY ANNEE ORDER BY ANNEE ASC """) def get_nb_moy_nuits_sup_20deg_par_zone_par_annee(): - return run_query(""" + table_name_temp = t("mensq_temperatures") + table_name_stations_zones = t("stations_zones") + return run_query(f""" WITH nuit_par_station AS ( SELECT t.NUM_POSTE, sz.Z_GEO, t.ANNEE, SUM(t.NBJTNS20) AS nuits_ge_20_par_station - FROM `cc-reunion.data_meteofrance.stg_mensq_temperatures` t - JOIN `cc-reunion.MENS_meteofrance.stations_zones` sz + FROM `{table_name_temp}` t + JOIN `{table_name_stations_zones}` sz ON t.NUM_POSTE = sz.NUM_POSTE GROUP BY t.NUM_POSTE, sz.Z_GEO, t.ANNEE ), @@ -106,23 +81,27 @@ def get_nb_moy_nuits_sup_20deg_par_zone_par_annee(): FROM par_zone ORDER BY zone_geographique, ANNEE """) + def get_table_histo_simu(): + table_name = t("histo_simu_ann") return run_query(f""" SELECT * - FROM `cc-reunion.data_meteofrance.histo_simu_ann` + FROM `{table_name}` """) -def get_table(tab_name): +def get_table(table_name): return run_query(f""" SELECT * - FROM `{tab_name}` + FROM `{table_name}` """) def get_full_table_for_cyclone(): - return run_query("SELECT * FROM `cc-reunion.data_meteofrance.histo_simu_geo`") + table_name = t("histo_simu_geo") + return run_query(f"SELECT * FROM `{table_name}`") def get_table_pluie_extreme(): - return run_query(""" + table_name = t("histo_simu_geo") + return run_query(f""" WITH CTE AS ( SELECT NBJFXI3S16X, @@ -141,7 +120,7 @@ def get_table_pluie_extreme(): ELSE 'Normal' END AS Forte_pluviometrie FROM - `cc-reunion.data_meteofrance.histo_simu_geo` AS t1 + `{table_name}` AS t1 ) SELECT @@ -158,7 +137,7 @@ def get_table_pluie_extreme(): FROM CTE """) - #histo_ann +#histo_ann # CREATE OR REPLACE TABLE `data_meteofrance.histo_simu_geo` AS ( # SELECT # NUM_POSTE, @@ -179,7 +158,8 @@ def get_table_pluie_extreme(): def get_detection_precip_superieure100mm(): - return run_query(""" + table_name = t("mensq_pluviometrie") + return run_query(f""" WITH CTE AS ( SELECT ANNEE AS annee, @@ -198,7 +178,7 @@ def get_detection_precip_superieure100mm(): -- Nombre de jours > 100mm AVG(NBJRR100) AS Nb_Jours_Sup_100mm - FROM `cc-reunion.data_meteofrance.stg_mensq_pluviometrie` + FROM `{table_name}` GROUP BY ANNEE, MOIS, @@ -216,7 +196,9 @@ def get_detection_precip_superieure100mm(): # --- Requête SQL --- # La requête SQL reste inchangée, elle récupère toutes les données annuelles agrégées par zone. def get_annuelles_par_zone(): - return run_query(""" + table_name_annee = t("Table_NBJTXS32_ANNEE") + stations = t("stations") + return run_query(f""" WITH CTE AS ( SELECT t1.ANNEE, @@ -225,9 +207,9 @@ def get_annuelles_par_zone(): AVG(t1.total_jours_sup_32c_annuel) AS moyenne_jours_chauds_zone, COUNT(DISTINCT t1.NUM_POSTE) AS nombre_stations_incluses FROM - `cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE` AS t1 + `{table_name_annee}` AS t1 INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS t2 + `{stations}` AS t2 ON t1.NUM_POSTE = t2.NUM_POSTE GROUP BY t1.ANNEE, @@ -250,7 +232,10 @@ def get_annuelles_par_zone(): # --- Requête SQL (Version Statique 2100) --- # La requête a été simplifiée et corrigée pour ne calculer que la projection en 2100. def get_projection_2100(): - return run_query(""" + table_name_annee = t("Table_NBJTXS32_ANNEE") + stations = t("stations") + table_sim_2100 = t("Table_sim_2100") + return run_query(f""" WITH T_OBS_REF AS ( -- 1. CALCUL DE LA BASELINE OBSERVÉE PAR STATION (Moyenne 1991-2020) SELECT @@ -259,9 +244,9 @@ def get_projection_2100(): t2.Z_GEO, AVG(t1.total_jours_sup_32c_annuel) AS baseline_jours_chauds_ref FROM - `cc-reunion.MENS_meteofrance.Table_NBJTXS32_ANNEE` AS t1 + `{table_name_annee}` AS t1 INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS t2 + `{stations}` AS t2 ON t1.NUM_POSTE = t2.NUM_POSTE WHERE t1.ANNEE BETWEEN '1991' AND '2020' @@ -276,7 +261,7 @@ def get_projection_2100(): t2.Z_GEO, AVG(t2.NBJTXS32) AS delta_jours_chauds_moyen_2100 FROM - `cc-reunion.MENS_meteofrance.Table_sim_2100` AS t2 + `{table_sim_2100}` AS t2 WHERE EXTRACT(YEAR FROM t2.date_2100) = 2100 GROUP BY @@ -302,7 +287,7 @@ def get_projection_2100(): FROM T_PROJ_AGR AS T_PROJ INNER JOIN - `cc-reunion.MENS_meteofrance.stations` AS T_ST + `{stations}` AS T_ST ON T_PROJ.Z_GEO = T_ST.Z_GEO INNER JOIN T_OBS_REF diff --git a/src/data_layer/diag.py b/src/data_layer/diag.py new file mode 100644 index 0000000..722361c --- /dev/null +++ b/src/data_layer/diag.py @@ -0,0 +1,17 @@ +# This file is meant to be temporary. Once the changes described below are implemented, this file should be deleted. +# List of tables to be used in the project, sorted by dataset. +# The goal is to have a clear overview of the tables we are using that aren't using dbt processes, +# so that we can easily identify which tables we need to update. +# For example, stg_ -> int_ - > mart_ (or similar) for tables that are currently in stg_ +# but should be in int_ or mart_. +""" +data_meteofrance.stg_mensq_pluviometrie +data_meteofrance.stg_mensq_temperatures +data_meteofrance.histo_simu_ann +data_meteofrance.histo_simu_geo + +MENS_meteofrance.stations +MENS_meteofrance.stations_zones +MENS_meteofrance.Table_NBJTXS32_ANNEE +MENS_meteofrance.Table_sim_2100 +""" \ No newline at end of file