-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessor.py
More file actions
60 lines (49 loc) · 2.12 KB
/
preprocessor.py
File metadata and controls
60 lines (49 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
import streamlit as st
# create fucntion to filter data
def MultiSelectFilter(title,option_list):
selected=st.sidebar.multiselect(title,option_list)
select_all=st.sidebar.checkbox('Select All',value=True,key=title)
if select_all:
selected_options=option_list
else:
selected_options=selected
return selected_options
# fetch the date and time from the data
def fetch_time_data(df):
df['Date']=pd.to_datetime(df['Date'])
df['Year']=df['Date'].dt.year
df['Day']=df['Date'].dt.day
df['Month']=df['Date'].dt.month
month_dict={4:1, 5:2, 6:3,7:4, 8:5, 9:6, 10:7, 11:8, 12:9, 1:10, 2:11,3:12}
df['Financial_Month']=df['Month'].map(month_dict)
df['Financial_Year']= df.apply(lambda x: f"{x['Year']} - {x['Year']+1}" if x['Month'] >= 4 else f"{x['Year']-1} - {x['Year']}",axis=1)
return df
# find the top retailers
def top_revenue_retailers(df):
Revenue=df.groupby('Retailer')['Amount'].sum().reset_index().sort_values(by='Amount',ascending=False)
Total_Revenue=Revenue['Amount'].sum()
percentages=[100,90,80,70,60,50,40,30,20,10]
retailers_count=[]
for i in percentages:
target_revenue= Total_Revenue * i * 0.01
loop=1
while( loop <= len(Revenue) and Revenue.iloc[:loop,1].sum() <=target_revenue):
loop +=1
retailers_count.append(loop)
retailers=pd.DataFrame({'percentage_revenue':percentages,'retailers_count':retailers_count})
return retailers
# find the top companies
def top_revenue_company(df):
Revenue=df.groupby('Company')['Amount'].sum().reset_index().sort_values(by='Amount',ascending=False)
Total_Revenue=Revenue['Amount'].sum()
percentages=[100,90,80,70,60,50,40,30,20,10]
companies_count=[]
for i in percentages:
target_revenue= Total_Revenue * i * 0.01
loop=1
while( loop <= len(Revenue) and Revenue.iloc[:loop,1].sum() <=target_revenue):
loop +=1
companies_count.append(loop)
companies=pd.DataFrame({'percentage_revenue':percentages,'retailers_count':companies_count})
return companies