-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_script.py
More file actions
67 lines (61 loc) · 3.76 KB
/
main_script.py
File metadata and controls
67 lines (61 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import argparse
import time
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
#------------------------------------------------------------------- import packages
from p_acquisition import m_acquisition as m_ac
from p_acquisition import m_cleaning as m_cl
from p_wrangling import m_wrangling as m_wr
from p_analysis import m_analysis as m_an
from p_reporting import m_reporting as m_rep
#------------------------------------------------------------------- global
NUM_TOP_SKILLS = 5
#------------------------------------------------------------------- main functions
def argument_parser():
parser = argparse.ArgumentParser(description = 'Specify input DB file and API key...')
parser.add_argument('-c', '--country', type= str, nargs='*', help= 'specify an European Country to choose from...', required=False)
args = parser.parse_args()
return args
def main(some_args):
print(f' · Parsing argument: {arguments.country}')
country = m_ac.list_to_string(arguments.country)
print('\t ··· Fetching European countries from web scrapping')
european_countries = m_wr.get_dictEuropeanCountries()
print(f'\t ··· Validating country argparse')
if m_an.country_argparse_eval(country, list(european_countries.values())) == country:
print(f' ·· country_argument found in ddbb')
print(f'\t\t\t\t\t >> continuing w script...')
country_validated = country
return country_validated, european_countries
elif country == "All" or country == "":
print(f'\t\t >> getting all countries')
country_validated = ''
return country_validated, european_countries
else:
print(f'\t\t >> country_argument not found.')
print(f'\t\t >> proceeding to exit')
time.sleep(3)
exit()
#-------------------------------------------------------------------
if __name__ == '__main__':
print(f'''
<================================ PIPELINE PROJECT =================================>
<================== JOBS IN DATA BY [GENDER] + POLL [BASICINCOME] ===================>
<================================ SAMPLE YEAR = 2016 ================================>\n''')
arguments = argument_parser()
print(f'\n[1] VALIDATING ARGUMENT =================================================>')
country_argument, dict_european_countries = main(arguments.country)
print(f'\n[2] ACQUIRING RAW DATA ==================================================>')
list_of_raw_dfs_from_bbdd = m_ac.get_ddbb() # ['career_info', 'country_info', 'personal_info', 'poll_info']
print(f'\n[3] CLEANING AND SAVING IN LOCAL ========================================>')
list_of_df_info_cleaned = m_cl.get_all_info_tables(list_of_raw_dfs_from_bbdd[0:-1], dict_european_countries) # ['career_info', 'country_info', 'personal_info']
list_of_df_polls_cleaned = m_cl.acquire_poll_info(list_of_raw_dfs_from_bbdd[-1]) # [[df_1,..., df_5], [col_name_1,...,col_name_5]]
print(f'\n[4] DATA ANALYSIS ========================================================>')
df_job_gender = m_an.get_percentages_gender_by_job(m_an.get_base_analysis_df(country_argument, list_of_df_info_cleaned)) # table from challenge 1
df_top_skills = m_an.get_df_top_skills(country_argument, NUM_TOP_SKILLS, list_of_df_info_cleaned) # [[ top_skills_bonus_3], [counts_bonus_3]]
print(f'\n[5] DATA VISUALIZATION ===================================================>')
m_rep.distribution_top_skills(country_argument, NUM_TOP_SKILLS, list_of_df_info_cleaned, genders= ['F', 'M'])
m_rep.distribution_BI_arguments(country_argument, list_of_df_info_cleaned, list_of_df_polls_cleaned)
print('<================================ [[ END ]] ================================>')