Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
venv/
__pycache__/
*.pyc
.env
Binary file added Looqbox_DataChallenge_Gabriella.pdf
Binary file not shown.
49 changes: 49 additions & 0 deletions caso1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Execução do caso 1 automacao em python
Autora: Gabriella Pacheco

"""

import pandas as pd
from typing import Optional
from mysql_conexao import executa_query

def recupera_dados(
product_code: Optional[int] = None,
store_code: Optional[int] = None,
date: Optional[list] = None
) -> pd.DataFrame:
query = "SELECT * FROM data_product_sales WHERE 1=1"
params = []

if product_code is not None:
query += " AND PRODUCT_CODE = %s"
params.append(product_code)

if store_code is not None:
query += " AND STORE_CODE = %s"
params.append(store_code)

if date is not None:
query += " AND DATE BETWEEN %s AND %s"
params.extend(date)

return executa_query(query, params or None)

# Executa
if __name__ == "__main__":
print("Consulta de Vendas por Produto")
print("(Pressione Enter para ignorar um filtro)\n")

product_input = input("Código do produto: ").strip()
store_input = input("Código da loja: ").strip()
date_start = input("Data início (YYYY-MM-DD): ").strip()
date_end = input("Data fim (YYYY-MM-DD): ").strip()

product_code = int(product_input) if product_input else None
store_code = int(store_input) if store_input else None
date = [date_start, date_end] if date_start and date_end else None

my_data = recupera_dados(product_code, store_code, date)
print(f"\n{len(my_data)} registro(s) encontrado(s):\n")
print(my_data)
68 changes: 68 additions & 0 deletions caso2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Execução do caso 2 filtro por periodo
Autora: Gabriella Pacheco
"""

import pandas as pd
from mysql_conexao import executa_query


QUERY_LOJAS = """
SELECT
STORE_CODE,
STORE_NAME,
START_DATE,
END_DATE,
BUSINESS_NAME,
BUSINESS_CODE
FROM data_store_cad
"""

QUERY_VENDAS = """
SELECT
STORE_CODE,
DATE,
SALES_VALUE,
SALES_QTY
FROM data_store_sales
WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31'
"""

def calcula_ticket_medio(date: list) -> pd.DataFrame:

lojas = executa_query(QUERY_LOJAS)
vendas = executa_query(QUERY_VENDAS)

# Filtrando o periodo desejado pelo cliente
vendas["DATE"] = pd.to_datetime(vendas["DATE"])
vendas = vendas[
(vendas["DATE"] >= date[0]) &
(vendas["DATE"] <= date[1])
]

vendas_agrupadas = (
vendas.groupby("STORE_CODE")
.agg(SALES_VALUE=("SALES_VALUE", "sum"), SALES_QTY=("SALES_QTY", "sum"))
.reset_index()
)

# Juntando as informacoes
df = vendas_agrupadas.merge(lojas[["STORE_CODE", "STORE_NAME", "BUSINESS_NAME"]], on="STORE_CODE")

# ticket medio do caso
df["TM"] = (df["SALES_VALUE"] / df["SALES_QTY"]).round(2)

resultado = (
df[["STORE_NAME", "BUSINESS_NAME", "TM"]]
.rename(columns={"STORE_NAME": "Loja", "BUSINESS_NAME": "Categoria"})
.sort_values("Loja")
.reset_index(drop=True)
)

return resultado

if __name__ == "__main__":
date = ["2019-10-01", "2019-12-31"]

df = calcula_ticket_medio(date)
print(df.to_string(index=False))
60 changes: 60 additions & 0 deletions caso3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Visualizacoes com IMDB_movies
Autora: Gabriella Pacheco
"""

import pandas as pd
import matplotlib.pyplot as plt
from mysql_conexao import executa_query


def carrega_dados() -> pd.DataFrame:
return executa_query("SELECT * FROM IMDB_movies")


def prepara_generos(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df["Genre"] = df["Genre"].str.split(",")
return df.explode("Genre").assign(Genre=lambda x: x["Genre"].str.strip())


def grafico_nota_por_genero(df: pd.DataFrame, ax: plt.Axes):
df_generos = prepara_generos(df)
media = (
df_generos.groupby("Genre")["Rating"]
.mean()
.sort_values()
)

ax.barh(media.index, media.values, color="#00b6af")
ax.set_title("Nota Media por Genero", fontsize=13, fontweight="bold")
ax.set_xlabel("Nota media (IMDb)")
ax.axvline(media.mean(), color="red", linestyle="--", linewidth=1, label="Media geral")
ax.legend()


def grafico_receita_vs_nota(df: pd.DataFrame, ax: plt.Axes):
df_limpo = df.dropna(subset=["RevenueMillions"])

ax.scatter(df_limpo["Rating"], df_limpo["RevenueMillions"], alpha=0.6, color="#0075b4")
ax.set_title("Receita x Nota IMDB", fontsize=13, fontweight="bold")
ax.set_xlabel("Nota (IMDb)")
ax.set_ylabel("Receita (milhoes USD)")


def main():
df = carrega_dados()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
fig.suptitle("Analise IMDB Movies", fontsize=15, fontweight="bold")

grafico_nota_por_genero(df, ax1)
grafico_receita_vs_nota(df, ax2)

plt.tight_layout()
plt.savefig("caso3_imdb.png", dpi=150, bbox_inches="tight")
print("Grafico salvo: caso3_imdb.png")


if __name__ == "__main__":
main()
Binary file added caso3_imdb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added grafico_1_produtos.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added grafico_2_secoes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added grafico_3_vendas.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 41 additions & 0 deletions mysql_conexao.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Módulo de conexão com MySQL.
Autora: Gabriella Pacheco
"""

import os
import logging
import pandas as pd
import mysql.connector
from dotenv import load_dotenv

load_dotenv()

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)

# Obtendo credenciais de .env
DB_CONFIG = {
"host": os.getenv("DB_HOST"),
"port": int(os.getenv("DB_PORT", "3306")),
"database": os.getenv("DB_NAME", "looqbox-challenge"),
"user": os.getenv("DB_USER"),
"password": os.getenv("DB_PASSWORD"),
}

# Roda uma query no banco e devolve um DataFrame.
def executa_query(sql, params=None):
conn = None

try:
conn = mysql.connector.connect(**DB_CONFIG)
logger.info("Conectado ao banco com sucesso.")
df = pd.read_sql(sql, conn, params=params)
return df
except mysql.connector.Error as e:
logger.error("Erro na conexao: %s", e)
raise
finally:
if conn and conn.is_connected():
conn.close()
logger.info("Conexao encerrada.")
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mysql-connector-python==8.3.0
pandas==2.2.1
matplotlib==3.8.3
python-dotenv==1.0.1
jupyter==1.0.0
133 changes: 133 additions & 0 deletions sql_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
Respostas para o SQL Test do desafios.
Autora: Gabriella Pacheco
"""

import matplotlib.pyplot as plt
from mysql_conexao import executa_query


# Top 10 produtos mais caros

def produtos_mais_caros():
sql = """
SELECT
PRODUCT_NAME,
PRODUCT_VAL
FROM data_product
ORDER BY PRODUCT_VAL DESC
LIMIT 10
"""
return executa_query(sql)


def grafico_produtos(df):
fig, ax = plt.subplots(figsize=(10, 6))

ax.barh(df["PRODUCT_NAME"], df["PRODUCT_VAL"], color="#4C72B0")
ax.invert_yaxis()
ax.set_xlabel("Valor (R$)")
ax.set_title("Top 10 produtos mais caros", fontweight="bold")

for i, val in enumerate(df["PRODUCT_VAL"]):
ax.text(val + 0.5, i, f"R$ {val:,.2f}", va="center", fontsize=9)

plt.tight_layout()
plt.savefig("grafico_1_produtos.png", dpi=150)
plt.show()
print("Gráfico salvo: grafico_1_produtos.png\n")


# Seções dos departamentos BEBIDAS e PADARIA

def secoes_por_departamento():
sql = """
SELECT DISTINCT
DEP_NAME,
SECTION_NAME
FROM data_product
WHERE DEP_NAME IN ('BEBIDAS', 'PADARIA')
ORDER BY DEP_NAME, SECTION_NAME
"""
return executa_query(sql)


def grafico_secoes(df):
contagem = df.groupby("DEP_NAME")["SECTION_NAME"].count()

fig, axes = plt.subplots(1, 2, figsize=(12, 5))
cores = {"BEBIDAS": "#4C72B0", "PADARIA": "#DD8452"}

for ax, (dep, grupo) in zip(axes, df.groupby("DEP_NAME")):
secoes = grupo["SECTION_NAME"].tolist()
ax.barh(secoes, [1] * len(secoes), color=cores.get(dep, "#888"))
ax.set_xlim(0, 1.5)
ax.set_xticks([])
ax.set_title(f"{dep} ({len(secoes)} seções)", fontweight="bold")
ax.invert_yaxis()

plt.suptitle("Secoes por departamento", fontsize=13, fontweight="bold")
plt.tight_layout()
plt.savefig("grafico_2_secoes.png", dpi=150)
plt.show()
print("Gráfico salvo: grafico_2_secoes.png\n")


# Total de vendas por Business Area no Q1 2019

def vendas_por_business_area():
sql = """
SELECT
sc.BUSINESS_NAME,
SUM(ss.SALES_VALUE) AS TOTAL_VENDAS
FROM data_store_sales ss
JOIN data_store_cad sc ON ss.STORE_CODE = sc.STORE_CODE
WHERE ss.DATE BETWEEN '2019-01-01' AND '2019-03-31'
GROUP BY sc.BUSINESS_NAME
ORDER BY TOTAL_VENDAS DESC
"""
return executa_query(sql)


def grafico_vendas(df):
cores = ["#00b6af", "#00a2bd", "#008cbf", "#0075b4", "#435c9c"]

fig, ax = plt.subplots(figsize=(9, 5))

bars = ax.bar(df["BUSINESS_NAME"], df["TOTAL_VENDAS"], color=cores)
ax.set_ylabel("Total de Vendas (R$)")
ax.set_title("Vendas por Business Area Q1 2019", fontweight="bold")
ax.tick_params(axis="x", rotation=25)

for bar, val in zip(bars, df["TOTAL_VENDAS"]):
ax.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() + max(df["TOTAL_VENDAS"]) * 0.01,
f"R$ {val:,.0f}",
ha="center", va="bottom", fontsize=8
)

plt.tight_layout()
plt.savefig("grafico_3_vendas.png", dpi=150)
plt.show()
print("Gráfico salvo: grafico_3_vendas.png\n")


# Execução

if __name__ == "__main__":

print("\n Top 10 produtos mais caros")
df1 = produtos_mais_caros()
print(df1.to_string(index=False))
grafico_produtos(df1)

print("\n Secoess de BEBIDAS e PADARIA")
df2 = secoes_por_departamento()
print(df2.to_string(index=False))
grafico_secoes(df2)

print("\n Vendas por Business Area Q1 2019 ")
df3 = vendas_por_business_area()
print(df3.to_string(index=False))
grafico_vendas(df3)
Loading