Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/eed_webscrapping_scripts/dwd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"prepare_db",
"download_json_to_duckdb",
"get_config",
"read_sql_file",
"pollenflug_gefahrenindex",
]
from .db import prepare_db
Expand Down
4 changes: 4 additions & 0 deletions src/eed_webscrapping_scripts/dwd/config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
pollenflug_gefahrenindex:
url: "https://opendata.dwd.de/climate_environment/health/alerts/s31fg.json"
db_infos:
db: "dwd"
datalake: "datalake"
git_root: ""
20 changes: 20 additions & 0 deletions src/eed_webscrapping_scripts/dwd/create_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from eed_webscrapping_scripts.dwd import get_config, prepare_db
from eed_webscrapping_scripts.modules import read_sql_file


def create_views():
cfg = get_config()
datalake = cfg["pollenflug_gefahrenindex"]["db_infos"]["datalake"]
sql = read_sql_file("dwd\sqls\list_available_tables.sql", cfg["git_root"])
con = prepare_db()
_tables = con.sql(sql).fetchall()
tables = [item[0] for item in _tables]
sql_union_view_base = " UNION BY NAME ".join(
[f"FROM {datalake}.{table}" for table in tables]
)
sql_union_view = f"CREATE OR REPLACE VIEW {datalake}.Pollenflug_Gefahrenindex AS ({sql_union_view_base})"
con.sql(sql_union_view)


if __name__ == "__main__":
create_views()
15 changes: 3 additions & 12 deletions src/eed_webscrapping_scripts/dwd/db.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
import os
import duckdb
from eed_webscrapping_scripts.modules import connect_to_db

home_dir = os.path.expanduser("~")


def prepare_db():
try:
with open(os.path.join(home_dir, ".motherduck_token")) as f:
md_token = f.read()
except Exception:
md_token = os.getenv("MD_TOKEN")

con = duckdb.connect(f"md:?motherduck_token={md_token.strip()}")
def prepare_db(con=None):
con = con or connect_to_db()
con.sql("CREATE DATABASE IF NOT EXISTS dwd")
con.sql("USE dwd")
con.sql("CREATE SCHEMA IF NOT EXISTS datalake")
Expand Down
10 changes: 10 additions & 0 deletions src/eed_webscrapping_scripts/dwd/sqls/list_available_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
WITH _tables AS (
SELECT table_name, TRUE AS in_dwd_db FROM duckdb_tables()
WHERE TRUE AND database_name = 'dwd'
)
, _tables_loaded AS (
SELECT table_name, TRUE AS in_loaded_tables FROM datalake.loaded_tables
)
SELECT
* REPLACE(COALESCE(in_dwd_db, FALSE) AS in_dwd_db, COALESCE(in_loaded_tables, FALSE) AS in_loaded_tables )
FROM _tables INNER JOIN _tables_loaded USING(table_name);
6 changes: 3 additions & 3 deletions src/eed_webscrapping_scripts/dwd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ def download_json_to_duckdb(url, con):


def get_config():
path_to_config = os.path.join(
eed_webscrapping_scripts.__path__[0], "dwd", "config.yaml"
)
git_root = eed_webscrapping_scripts.__path__[0]
path_to_config = os.path.join(git_root, "dwd", "config.yaml")

with open(path_to_config, "r") as file:
cfg = yaml.safe_load(file)
cfg["git_root"] = git_root
return cfg


Expand Down
5 changes: 5 additions & 0 deletions src/eed_webscrapping_scripts/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__all__ = [
"read_sql_file",
"connect_to_db",
]
from .modules import read_sql_file, connect_to_db
23 changes: 23 additions & 0 deletions src/eed_webscrapping_scripts/modules/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import duckdb

home_dir = os.path.expanduser("~")


def read_sql_file(path_to_file, git_root=None):
git_root = git_root or ""
path_to_file = os.path.join(git_root, path_to_file)
with open(path_to_file, "r") as file:
sql = file.read()
return sql


def connect_to_db():
try:
with open(os.path.join(home_dir, ".motherduck_token")) as f:
md_token = f.read()
except Exception:
md_token = os.getenv("MD_TOKEN")

con = duckdb.connect(f"md:?motherduck_token={md_token.strip()}")
return con
Loading