diff --git a/README.md b/README.md index 39b6d39..2922341 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,6 @@ export BD_DJANGO_PASSWORD="password" ```python from databasers_utils import ( TableArchitecture, - copy_models_from_dev_to_prod, get_architecture_table_from_api, ) @@ -61,9 +60,6 @@ arch.update_dbt_project() # Faz o upload das colunas para o DJango arch.upload_columns() -# Copia os modelos em dev para prod -copy_models_from_dev_to_prod(["br_ibge_ppm", "br_ibge_pam"]) - # Retorna um DataFrame da arquitetura obtida na API # Util para gerar arquitetura quando ela não está no Drive get_architecture_table_from_api("br_ms_sinasc", "microdados") diff --git a/poetry.lock b/poetry.lock index 0e1861e..d7b91c7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -35,13 +35,13 @@ files = [ [[package]] name = "basedosdados" -version = "2.0.0b27" +version = "2.0.2" description = "Organizar e facilitar o acesso a dados brasileiros através de tabelas públicas no BigQuery." optional = false python-versions = "<4,>=3.9" files = [ - {file = "basedosdados-2.0.0b27-py3-none-any.whl", hash = "sha256:ad2ed3868e5a0e00835d417f6a99ae25f884c43683f2016c4e615ef8cfdef801"}, - {file = "basedosdados-2.0.0b27.tar.gz", hash = "sha256:f1b4df7248899b6757af9dab358333edc093b93ee4bef96eec1363e148225eb6"}, + {file = "basedosdados-2.0.2-py3-none-any.whl", hash = "sha256:f1f79636e1b25fd452019a32bfd9a297062a05d674b47a2a8367e158a7bdf31d"}, + {file = "basedosdados-2.0.2.tar.gz", hash = "sha256:7881b1f9218dcb17c0d2fa27da3f2dfec4acdda543ce2a02ee488402475d1bcc"}, ] [package.dependencies] @@ -52,9 +52,8 @@ google-cloud-bigquery-storage = ">=2.19,<3.0" google-cloud-storage = ">=2.9,<3.0" gql = {version = ">=3.4,<4.0", optional = true, markers = "extra == \"all\" or extra == \"upload\""} loguru = ">=0.7.0,<0.8.0" -numpy = "<2.0.0" pandas = ">=2.0,<3.0" -pandas-gbq = ">=0.19,<0.20" +pandas-gbq = ">=0.19" pandavro = {version = ">=1.7,<2.0", optional = true, markers = "extra == \"all\" or extra == \"avro\""} pydata-google-auth = ">=1.8,<2.0" requests-toolbelt = {version = ">=1,<2", optional = true, markers = "extra == \"all\" or extra == \"upload\""} @@ -1705,4 +1704,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "b6d264b1aa361999916a7e8bf3c43e2c216e2d40887089ffbd1a3b1bcac18091" +content-hash = "3150b610aad0dc6db198f11f25373af0a70d837d7977db324981ec3970943b4f" diff --git a/pyproject.toml b/pyproject.toml index e63ee3f..1f76669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,11 +17,11 @@ lint = "scripts.lint:main" [tool.poetry.dependencies] python = ">=3.9,<4" -basedosdados = {version = "2.0.0-beta.27", extras = ["all"]} +basedosdados = {extras = ["all"], version = "^2.0.2"} pandas = "^2.2.2" numpy = "^1.26.4" -requests = "^2.32.3" -ruamel-yaml = "^0.18.6" +requests = ">=2.26.0" +ruamel-yaml = ">=0.17.10" [tool.poetry.group.dev.dependencies] diff --git a/src/databasers_utils/__init__.py b/src/databasers_utils/__init__.py index 15a2d62..f4ea34d 100644 --- a/src/databasers_utils/__init__.py +++ b/src/databasers_utils/__init__.py @@ -1,9 +1,7 @@ -from .copy_models_from_dev_to_prod import copy_models_from_dev_to_prod from .get_architecture_table_from_api import get_architecture_table_from_api from .table_architecture import TableArchitecture __all__ = [ - "copy_models_from_dev_to_prod", "get_architecture_table_from_api", "TableArchitecture", ] diff --git a/src/databasers_utils/copy_models_from_dev_to_prod.py b/src/databasers_utils/copy_models_from_dev_to_prod.py deleted file mode 100644 index 7695fb9..0000000 --- a/src/databasers_utils/copy_models_from_dev_to_prod.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -from typing import Optional -from distutils.dir_util import copy_tree -from .utils import update_dbt_project - - -def change_origin_from_dev_to_staging( - file: str, prod_models_dataset_dir: str -) -> None: - sql_file = f"{prod_models_dataset_dir}/{file}" - - with open(sql_file, "r") as io: - sql_content = io.read() - - new_sql_content = sql_content.replace( - "basedosdados-dev.", "basedosdados-staging." - ) - - with open(sql_file, "w") as io: - io.write(new_sql_content) - - -def search_model_directory_recursive(directory: str) -> Optional[str]: - # Check if 'model' is in the current directory - ls_dir = os.listdir(directory) - if "models" in ls_dir: - return os.path.join(directory, "models") - - if "queries-basedosdados-dev" in ls_dir: - return os.path.join(directory, "queries-basedosdados-dev", "models") - - # Get the parent directory - parent_directory = os.path.dirname(directory) - - # If we've reached the root directory without finding 'model', return None - if directory == parent_directory: - return None - - # Otherwise, continue searching recursively in parent directories - return search_model_directory_recursive(parent_directory) - - -def copy_models_from_dev_to_prod( - datasets: list[str], dir: str = os.getcwd() -) -> None: - dev_models_path = search_model_directory_recursive(dir) - - if dev_models_path is None: - raise Exception(f"Failed to find model directory at {dir}") - - prod_models_dir = dev_models_path.replace( - "queries-basedosdados-dev", "queries-basedosdados" - ) - - if not os.path.exists(prod_models_dir): - raise Exception( - f"Prod models directory not exists at {prod_models_dir}" - ) - - # Go to root of queries-basedosdados - root_prod = os.path.dirname(prod_models_dir) - - for dataset_id in datasets: - prod_models_dataset_dir = f"{prod_models_dir}/{dataset_id}" - copy_tree(f"{dev_models_path}/{dataset_id}", prod_models_dataset_dir) - update_dbt_project(dataset_id, dir=root_prod) - [ - change_origin_from_dev_to_staging(file, prod_models_dataset_dir) - for file in os.listdir(prod_models_dataset_dir) - if file.endswith(".sql") - ] diff --git a/src/databasers_utils/table_architecture.py b/src/databasers_utils/table_architecture.py index 293dbc9..e7fb9dc 100644 --- a/src/databasers_utils/table_architecture.py +++ b/src/databasers_utils/table_architecture.py @@ -77,7 +77,7 @@ def create_sql_files( sql_line = f" safe_cast({original_name} as {bq_type}) {column_name},\n" file.write(sql_line) - sql_last_line = f"from `basedosdados-dev.{self.dataset_id}_staging.{table_id}` as t\n" + sql_last_line = f'from {{{{ set_datalake_project("{self.dataset_id}_staging.{table_id}") }}}} as t\n' file.write(sql_last_line) print("SQL files created!") diff --git a/src/databasers_utils/utils.py b/src/databasers_utils/utils.py index c92a94d..280fc8a 100644 --- a/src/databasers_utils/utils.py +++ b/src/databasers_utils/utils.py @@ -25,10 +25,21 @@ def read_architecture_table(url: str) -> pd.DataFrame: io.StringIO( requests.get(export_url, timeout=10).content.decode("utf-8") ) - ) + ).apply(lambda x: x.str.strip() if x.dtype == object else x) df_architecture = df_architecture.loc[ - df_architecture["name"] != "(excluido)" + ~df_architecture["name"].isin( # type: ignore + [ + "(excluido)", + "excluido", + "(excluído)", + "excluído", + "removido", + "(removido)", + "deletado", + "(deletado)", + ] + ) ] return df_architecture.replace(np.nan, "", regex=True) diff --git a/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__gini.sql b/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__gini.sql index 74c1ff8..7116f01 100644 --- a/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__gini.sql +++ b/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__gini.sql @@ -8,4 +8,4 @@ select safe_cast(gini_va_industria as int64) gini_va_industria, safe_cast(gini_va_servicos as int64) gini_va_servicos, safe_cast(gini_va_adespss as int64) gini_va_adespss, -from `basedosdados-dev.br_ibge_pib_staging.gini` as t +from {{ set_datalake_project("br_ibge_pib_staging.gini") }} as t diff --git a/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__uf.sql b/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__uf.sql index 0af8260..700708c 100644 --- a/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__uf.sql +++ b/tests/queries-basedosdados-dev/models/br_ibge_pib/br_ibge_pib__uf.sql @@ -10,4 +10,4 @@ select safe_cast(va_industria as int64) va_industria, safe_cast(va_servicos as int64) va_servicos, safe_cast(va_adespss as int64) va_adespss, -from `basedosdados-dev.br_ibge_pib_staging.uf` as t +from {{ set_datalake_project("br_ibge_pib_staging.uf") }} as t diff --git a/tests/queries-basedosdados-dev/test_copy_models_from_dev_to_prod.py b/tests/queries-basedosdados-dev/test_copy_models_from_dev_to_prod.py deleted file mode 100644 index 5ba5184..0000000 --- a/tests/queries-basedosdados-dev/test_copy_models_from_dev_to_prod.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import pytest -from databasers_utils import copy_models_from_dev_to_prod - -TESTS_DEV_DIR = os.path.join(os.getcwd(), "tests", "queries-basedosdados-dev") - - -@pytest.mark.dependency( - depends=[ - "test_table_architecture.test_create_yaml_file", - "test_table_architecture.test_create_sql_files", - "test_table_architecture.test_update_dbt_project", - ] -) -def test_copy_models(): - copy_models_from_dev_to_prod(datasets=["br_ibge_pib"], dir=TESTS_DEV_DIR)