diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ca2993c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +*.html \ No newline at end of file diff --git a/case1.py b/case1.py new file mode 100644 index 0000000..2647e00 --- /dev/null +++ b/case1.py @@ -0,0 +1,39 @@ +from dotenv import load_dotenv +import os +import mysql.connector +import pandas as pd +from sales import SalesRepository + +load_dotenv() + +host = os.getenv("DB_HOST") +user = os.getenv("DB_USER") +password = os.getenv("DB_PASSWORD") +database = os.getenv("DB_NAME") +port = os.getenv("DB_PORT") + + +try: + conexao = mysql.connector.connect( + host=host, + user=user, + password=password, + database=database, + port=port + ) + print("Conexão bem-sucedida!") +except mysql.connector.Error as err: + print(f"Erro ao conectar ao banco de dados: {err}") + conexao = None + + +sales_repo = SalesRepository(conexao) + +df = sales_repo.retrieve_data( + product_code=18, + store_code=1, + start_date='2019-01-01', + end_date='2019-01-31' +) + +print(df) \ No newline at end of file diff --git a/case2.py b/case2.py new file mode 100644 index 0000000..7465764 --- /dev/null +++ b/case2.py @@ -0,0 +1,109 @@ +from dotenv import load_dotenv +import os +import mysql.connector +import pandas as pd + +load_dotenv() + +host = os.getenv("DB_HOST") +user = os.getenv("DB_USER") +password = os.getenv("DB_PASSWORD") +database = os.getenv("DB_NAME") +port = os.getenv("DB_PORT") + + +try: + conexao = mysql.connector.connect( + host=host, + user=user, + password=password, + database=database, + port=port + ) + print("Conexão bem-sucedida!") +except mysql.connector.Error as err: + print(f"Erro ao conectar ao banco de dados: {err}") + conexao = None + + +def manual_query(conexao, query): + if conexao is None: + raise ConnectionError("Sem conexão ativa!") + + cursor = conexao.cursor() + + cursor.execute(query) + + result = cursor.fetchall() + + columns = [desc[0] for desc in cursor.description] + + return pd.DataFrame(result, columns=columns) + +query1 = """ +SELECT + STORE_CODE, + STORE_NAME, + START_DATE, + END_DATE, + BUSINESS_NAME, + BUSINESS_CODE +FROM data_store_cad +""" + +query2=""" +SELECT + STORE_CODE, + DATE, + SALES_VALUE, + SALES_QTY +FROM data_store_sales +WHERE DATE BETWEEN '2019-01-01' AND '2019-12-31' +""" +df_query_cad = manual_query(conexao, query1) +df_query_sales = manual_query(conexao, query2) + + +df_query_sales['DATE'] = pd.to_datetime(df_query_sales['DATE']) + +df_query_sales = df_query_sales[ + (df_query_sales['DATE'] >= '2019-10-01') & + (df_query_sales['DATE'] <= '2019-12-31') +] + +df_merged = pd.merge( + df_query_sales, + df_query_cad, + on='STORE_CODE' +) +df_grouped = ( + df_merged.groupby( + ['STORE_NAME', 'BUSINESS_NAME'], + as_index=False + ) + .agg({ + 'SALES_VALUE': 'sum', + 'SALES_QTY': 'sum' + }) +) + +df_grouped['TM'] = ( + df_grouped['SALES_VALUE'] / + df_grouped['SALES_QTY'] +).round(2) + +df_final = df_grouped[[ + 'STORE_NAME', + 'BUSINESS_NAME', + 'TM' +]].copy() + +df_final = ( + df_grouped[['STORE_NAME', 'BUSINESS_NAME', 'TM']].copy().rename(columns= + { + 'STORE_NAME': 'Loja', + 'BUSINESS_NAME': 'Categoria' + }) +) + +print(df_final) \ No newline at end of file diff --git a/case3.py b/case3.py new file mode 100644 index 0000000..ecd8059 --- /dev/null +++ b/case3.py @@ -0,0 +1,89 @@ +from dotenv import load_dotenv +import os +import mysql.connector +import pandas as pd +import plotly.express as px +load_dotenv() + +host = os.getenv("DB_HOST") +user = os.getenv("DB_USER") +password = os.getenv("DB_PASSWORD") +database = os.getenv("DB_NAME") +port = os.getenv("DB_PORT") + + +try: + conexao = mysql.connector.connect( + host=host, + user=user, + password=password, + database=database, + port=port + ) + print("Conexão bem-sucedida!") +except mysql.connector.Error as err: + print(f"Erro ao conectar ao banco de dados: {err}") + conexao = None + + +def manual_query(conexao, query): + if conexao is None: + raise ConnectionError("Sem conexão ativa!") + + cursor = conexao.cursor() + + cursor.execute(query) + + result = cursor.fetchall() + + columns = [desc[0] for desc in cursor.description] + + return pd.DataFrame(result, columns=columns) + +query = """ +select * from IMDB_movies +""" +df_query = manual_query(conexao, query) +#print(f'Colunas: {df_query.columns.tolist()}') +#print(df_query) + +df_sorted = df_query.sort_values("RevenueMillions", ascending=False) + +qnt_filmes = 35 + +top_movies = df_sorted.head(qnt_filmes) + +fig = px.bar( + top_movies, + x='RevenueMillions', + y='Title', + orientation='h', + color_continuous_scale=[ + [0.0, '#5B8FF9'], + [1.0, '#7DAAFF'] + ], + hover_data={ + 'Votes': True, + 'Director': True, + 'Year': True, + 'RevenueMillions': ':.2f', + 'Metascore': True + }, + labels={ + 'RevenueMillions': 'Receita (Milhões USD)', + 'Title': 'Filme', + 'Metascore': 'Metascore', + 'Votes': 'Votos', + 'Director': 'Diretor', + 'Year': 'Ano' + }, + title=f'Top {qnt_filmes} Filmes por Receita' +) + +fig.update_layout( + yaxis={'categoryorder': 'total ascending'} +) + +nome_arquivo = 'grafico_filmes.html' +fig.write_html(nome_arquivo) +print("Gráfico salvo como:",nome_arquivo) \ No newline at end of file diff --git a/sales.py b/sales.py new file mode 100644 index 0000000..2e42f0d --- /dev/null +++ b/sales.py @@ -0,0 +1,50 @@ +import pandas as pd + +class SalesRepository: + + def __init__(self, connection): + self.connection = connection + + def retrieve_data( + self, + product_code=None, + store_code=None, + start_date=None, + end_date=None + ): + + if self.connection is None: + raise ConnectionError("Sem conexão ativa!") + + query = """ + SELECT * + FROM data_product_sales + WHERE 1=1 + """ + + params = [] + + if product_code is not None: + query += " AND PRODUCT_CODE = %s" + params.append(product_code) + + if store_code is not None: + query += " AND STORE_CODE = %s" + params.append(store_code) + + if start_date is not None and end_date is not None: + query += " AND DATE BETWEEN %s AND %s" + params.extend([start_date, end_date]) + + print("Query:", query) + print("Params:", params) + + cursor = self.connection.cursor() + + cursor.execute(query, params) + + result = cursor.fetchall() + + columns = [desc[0] for desc in cursor.description] + + return pd.DataFrame(result, columns=columns) \ No newline at end of file