diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cbbb8d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +__pycache__/ +*.pyc +.env \ No newline at end of file diff --git a/Looqbox_DataChallenge_Gabriella.pdf b/Looqbox_DataChallenge_Gabriella.pdf new file mode 100644 index 0000000..7b41cf2 Binary files /dev/null and b/Looqbox_DataChallenge_Gabriella.pdf differ diff --git a/caso1.py b/caso1.py new file mode 100644 index 0000000..97f9cab --- /dev/null +++ b/caso1.py @@ -0,0 +1,49 @@ +""" +Execução do caso 1 automacao em python +Autora: Gabriella Pacheco + +""" + +import pandas as pd +from typing import Optional +from mysql_conexao import executa_query + +def recupera_dados( + product_code: Optional[int] = None, + store_code: Optional[int] = None, + date: Optional[list] = None +) -> pd.DataFrame: + query = "SELECT * FROM data_product_sales WHERE 1=1" + params = [] + + if product_code is not None: + query += " AND PRODUCT_CODE = %s" + params.append(product_code) + + if store_code is not None: + query += " AND STORE_CODE = %s" + params.append(store_code) + + if date is not None: + query += " AND DATE BETWEEN %s AND %s" + params.extend(date) + + return executa_query(query, params or None) + +# Executa +if __name__ == "__main__": + print("Consulta de Vendas por Produto") + print("(Pressione Enter para ignorar um filtro)\n") + + product_input = input("Código do produto: ").strip() + store_input = input("Código da loja: ").strip() + date_start = input("Data início (YYYY-MM-DD): ").strip() + date_end = input("Data fim (YYYY-MM-DD): ").strip() + + product_code = int(product_input) if product_input else None + store_code = int(store_input) if store_input else None + date = [date_start, date_end] if date_start and date_end else None + + my_data = recupera_dados(product_code, store_code, date) + print(f"\n{len(my_data)} registro(s) encontrado(s):\n") + print(my_data) \ No newline at end of file diff --git a/caso2.py b/caso2.py new file mode 100644 index 0000000..890bd12 --- /dev/null +++ b/caso2.py @@ -0,0 +1,68 @@ +""" +Execução do caso 2 filtro por periodo +Autora: Gabriella Pacheco +""" + +import pandas as pd +from mysql_conexao import executa_query + + +QUERY_LOJAS = """ +SELECT + STORE_CODE, + STORE_NAME, + START_DATE, + END_DATE, + BUSINESS_NAME, + BUSINESS_CODE +FROM data_store_cad +""" + +QUERY_VENDAS = """ +SELECT + STORE_CODE, + DATE, + SALES_VALUE, + SALES_QTY +FROM data_store_sales +WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31' +""" + +def calcula_ticket_medio(date: list) -> pd.DataFrame: + + lojas = executa_query(QUERY_LOJAS) + vendas = executa_query(QUERY_VENDAS) + + # Filtrando o periodo desejado pelo cliente + vendas["DATE"] = pd.to_datetime(vendas["DATE"]) + vendas = vendas[ + (vendas["DATE"] >= date[0]) & + (vendas["DATE"] <= date[1]) + ] + + vendas_agrupadas = ( + vendas.groupby("STORE_CODE") + .agg(SALES_VALUE=("SALES_VALUE", "sum"), SALES_QTY=("SALES_QTY", "sum")) + .reset_index() + ) + + # Juntando as informacoes + df = vendas_agrupadas.merge(lojas[["STORE_CODE", "STORE_NAME", "BUSINESS_NAME"]], on="STORE_CODE") + + # ticket medio do caso + df["TM"] = (df["SALES_VALUE"] / df["SALES_QTY"]).round(2) + + resultado = ( + df[["STORE_NAME", "BUSINESS_NAME", "TM"]] + .rename(columns={"STORE_NAME": "Loja", "BUSINESS_NAME": "Categoria"}) + .sort_values("Loja") + .reset_index(drop=True) + ) + + return resultado + +if __name__ == "__main__": + date = ["2019-10-01", "2019-12-31"] + + df = calcula_ticket_medio(date) + print(df.to_string(index=False)) \ No newline at end of file diff --git a/caso3.py b/caso3.py new file mode 100644 index 0000000..d6c0a50 --- /dev/null +++ b/caso3.py @@ -0,0 +1,60 @@ +""" +Visualizacoes com IMDB_movies +Autora: Gabriella Pacheco +""" + +import pandas as pd +import matplotlib.pyplot as plt +from mysql_conexao import executa_query + + +def carrega_dados() -> pd.DataFrame: + return executa_query("SELECT * FROM IMDB_movies") + + +def prepara_generos(df: pd.DataFrame) -> pd.DataFrame: + df = df.copy() + df["Genre"] = df["Genre"].str.split(",") + return df.explode("Genre").assign(Genre=lambda x: x["Genre"].str.strip()) + + +def grafico_nota_por_genero(df: pd.DataFrame, ax: plt.Axes): + df_generos = prepara_generos(df) + media = ( + df_generos.groupby("Genre")["Rating"] + .mean() + .sort_values() + ) + + ax.barh(media.index, media.values, color="#00b6af") + ax.set_title("Nota Media por Genero", fontsize=13, fontweight="bold") + ax.set_xlabel("Nota media (IMDb)") + ax.axvline(media.mean(), color="red", linestyle="--", linewidth=1, label="Media geral") + ax.legend() + + +def grafico_receita_vs_nota(df: pd.DataFrame, ax: plt.Axes): + df_limpo = df.dropna(subset=["RevenueMillions"]) + + ax.scatter(df_limpo["Rating"], df_limpo["RevenueMillions"], alpha=0.6, color="#0075b4") + ax.set_title("Receita x Nota IMDB", fontsize=13, fontweight="bold") + ax.set_xlabel("Nota (IMDb)") + ax.set_ylabel("Receita (milhoes USD)") + + +def main(): + df = carrega_dados() + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + fig.suptitle("Analise IMDB Movies", fontsize=15, fontweight="bold") + + grafico_nota_por_genero(df, ax1) + grafico_receita_vs_nota(df, ax2) + + plt.tight_layout() + plt.savefig("caso3_imdb.png", dpi=150, bbox_inches="tight") + print("Grafico salvo: caso3_imdb.png") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/caso3_imdb.png b/caso3_imdb.png new file mode 100644 index 0000000..4c5011f Binary files /dev/null and b/caso3_imdb.png differ diff --git a/grafico_1_produtos.png b/grafico_1_produtos.png new file mode 100644 index 0000000..fb4085c Binary files /dev/null and b/grafico_1_produtos.png differ diff --git a/grafico_2_secoes.png b/grafico_2_secoes.png new file mode 100644 index 0000000..61e2d4b Binary files /dev/null and b/grafico_2_secoes.png differ diff --git a/grafico_3_vendas.png b/grafico_3_vendas.png new file mode 100644 index 0000000..e4d4666 Binary files /dev/null and b/grafico_3_vendas.png differ diff --git a/mysql_conexao.py b/mysql_conexao.py new file mode 100644 index 0000000..cd14cfb --- /dev/null +++ b/mysql_conexao.py @@ -0,0 +1,41 @@ +""" +Módulo de conexão com MySQL. +Autora: Gabriella Pacheco +""" + +import os +import logging +import pandas as pd +import mysql.connector +from dotenv import load_dotenv + +load_dotenv() + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") +logger = logging.getLogger(__name__) + +# Obtendo credenciais de .env +DB_CONFIG = { + "host": os.getenv("DB_HOST"), + "port": int(os.getenv("DB_PORT", "3306")), + "database": os.getenv("DB_NAME", "looqbox-challenge"), + "user": os.getenv("DB_USER"), + "password": os.getenv("DB_PASSWORD"), +} + +# Roda uma query no banco e devolve um DataFrame. +def executa_query(sql, params=None): + conn = None + + try: + conn = mysql.connector.connect(**DB_CONFIG) + logger.info("Conectado ao banco com sucesso.") + df = pd.read_sql(sql, conn, params=params) + return df + except mysql.connector.Error as e: + logger.error("Erro na conexao: %s", e) + raise + finally: + if conn and conn.is_connected(): + conn.close() + logger.info("Conexao encerrada.") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cd1f2dd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +mysql-connector-python==8.3.0 +pandas==2.2.1 +matplotlib==3.8.3 +python-dotenv==1.0.1 +jupyter==1.0.0 \ No newline at end of file diff --git a/sql_test.py b/sql_test.py new file mode 100644 index 0000000..ad3e1ea --- /dev/null +++ b/sql_test.py @@ -0,0 +1,133 @@ +""" +Respostas para o SQL Test do desafios. +Autora: Gabriella Pacheco +""" + +import matplotlib.pyplot as plt +from mysql_conexao import executa_query + + +# Top 10 produtos mais caros + +def produtos_mais_caros(): + sql = """ + SELECT + PRODUCT_NAME, + PRODUCT_VAL + FROM data_product + ORDER BY PRODUCT_VAL DESC + LIMIT 10 + """ + return executa_query(sql) + + +def grafico_produtos(df): + fig, ax = plt.subplots(figsize=(10, 6)) + + ax.barh(df["PRODUCT_NAME"], df["PRODUCT_VAL"], color="#4C72B0") + ax.invert_yaxis() + ax.set_xlabel("Valor (R$)") + ax.set_title("Top 10 produtos mais caros", fontweight="bold") + + for i, val in enumerate(df["PRODUCT_VAL"]): + ax.text(val + 0.5, i, f"R$ {val:,.2f}", va="center", fontsize=9) + + plt.tight_layout() + plt.savefig("grafico_1_produtos.png", dpi=150) + plt.show() + print("Gráfico salvo: grafico_1_produtos.png\n") + + +# Seções dos departamentos BEBIDAS e PADARIA + +def secoes_por_departamento(): + sql = """ + SELECT DISTINCT + DEP_NAME, + SECTION_NAME + FROM data_product + WHERE DEP_NAME IN ('BEBIDAS', 'PADARIA') + ORDER BY DEP_NAME, SECTION_NAME + """ + return executa_query(sql) + + +def grafico_secoes(df): + contagem = df.groupby("DEP_NAME")["SECTION_NAME"].count() + + fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + cores = {"BEBIDAS": "#4C72B0", "PADARIA": "#DD8452"} + + for ax, (dep, grupo) in zip(axes, df.groupby("DEP_NAME")): + secoes = grupo["SECTION_NAME"].tolist() + ax.barh(secoes, [1] * len(secoes), color=cores.get(dep, "#888")) + ax.set_xlim(0, 1.5) + ax.set_xticks([]) + ax.set_title(f"{dep} ({len(secoes)} seções)", fontweight="bold") + ax.invert_yaxis() + + plt.suptitle("Secoes por departamento", fontsize=13, fontweight="bold") + plt.tight_layout() + plt.savefig("grafico_2_secoes.png", dpi=150) + plt.show() + print("Gráfico salvo: grafico_2_secoes.png\n") + + +# Total de vendas por Business Area no Q1 2019 + +def vendas_por_business_area(): + sql = """ + SELECT + sc.BUSINESS_NAME, + SUM(ss.SALES_VALUE) AS TOTAL_VENDAS + FROM data_store_sales ss + JOIN data_store_cad sc ON ss.STORE_CODE = sc.STORE_CODE + WHERE ss.DATE BETWEEN '2019-01-01' AND '2019-03-31' + GROUP BY sc.BUSINESS_NAME + ORDER BY TOTAL_VENDAS DESC + """ + return executa_query(sql) + + +def grafico_vendas(df): + cores = ["#00b6af", "#00a2bd", "#008cbf", "#0075b4", "#435c9c"] + + fig, ax = plt.subplots(figsize=(9, 5)) + + bars = ax.bar(df["BUSINESS_NAME"], df["TOTAL_VENDAS"], color=cores) + ax.set_ylabel("Total de Vendas (R$)") + ax.set_title("Vendas por Business Area Q1 2019", fontweight="bold") + ax.tick_params(axis="x", rotation=25) + + for bar, val in zip(bars, df["TOTAL_VENDAS"]): + ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height() + max(df["TOTAL_VENDAS"]) * 0.01, + f"R$ {val:,.0f}", + ha="center", va="bottom", fontsize=8 + ) + + plt.tight_layout() + plt.savefig("grafico_3_vendas.png", dpi=150) + plt.show() + print("Gráfico salvo: grafico_3_vendas.png\n") + + +# Execução + +if __name__ == "__main__": + + print("\n Top 10 produtos mais caros") + df1 = produtos_mais_caros() + print(df1.to_string(index=False)) + grafico_produtos(df1) + + print("\n Secoess de BEBIDAS e PADARIA") + df2 = secoes_por_departamento() + print(df2.to_string(index=False)) + grafico_secoes(df2) + + print("\n Vendas por Business Area Q1 2019 ") + df3 = vendas_por_business_area() + print(df3.to_string(index=False)) + grafico_vendas(df3) \ No newline at end of file diff --git a/teste_conexao.py b/teste_conexao.py new file mode 100644 index 0000000..b452c4e --- /dev/null +++ b/teste_conexao.py @@ -0,0 +1,38 @@ +""" +Teste rápido pra validar se a conexão com o banco está funcionando. +Autora: Gabriella Pacheco +""" + +from dotenv import load_dotenv +from mysql_conexao import executa_query + +# Carrega as variáveis do arquivo .env +load_dotenv() + + +def test_conexao(): + print("Testando conexao com o banco...") + df = executa_query("SELECT 1 AS ok") + assert not df.empty, "Conexao falhou — DataFrame vazio." + print("Conexao OK!\n") + + +def test_tabelas(): + print("Verificando tabelas disponiveis...") + df = executa_query("SHOW TABLES") + print(df.to_string(index=False)) + print() + + +def test_query_simples(): + print("Testando query na DATA_PRODUCT...") + df = executa_query("SELECT * FROM data_product LIMIT 3") + print(df.to_string(index=False)) + print() + + +if __name__ == "__main__": + test_conexao() + test_tabelas() + test_query_simples() + print("Tudo certo com a conexao.") \ No newline at end of file