diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..a93ac16 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/data/scrapeddata.csv b/data/scrapeddata.csv new file mode 100644 index 0000000..eaa25bd --- /dev/null +++ b/data/scrapeddata.csv @@ -0,0 +1,78 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 +11,United States of America,Federal state,".mw-parser-output .monospaced{font-family:monospace,monospace}USUSA840",US,00,,,U.S.,U.S.,U.S.A.,,,,, +12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, +13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, +14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, +15,Arkansas,State,US-AR,AR,05,AR,AR,Ark.,Ark.,,,,,, +16,California,State,US-CA,CA,06,CA,CF,Calif.,Calif.,"Ca., Cal.",,,,, +17,Colorado,State,US-CO,CO,08,CO,CL,Colo.,Colo.,Col.,,,,, +18,Connecticut,State,US-CT,CT,09,CT,CT,Conn.,Conn.,Ct.,,,,, +19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, +20,District of Columbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, +21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, +23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, +24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, +25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, +26,Indiana,State,US-IN,IN,18,IN,IN,Ind.,Ind.,In.,,,,, +27,Iowa,State,US-IA,IA,19,IA,IA,Iowa,Iowa,"Ia., Ioa.[1]",,,,, +28,Kansas,State,US-KS,KS,20,KS,KA,Kans.,Kan.,"Ks., Ka.",,,,, +29,Kentucky,State (Commonwealth),US-KY,KY,21,KY,KY,Ky.,Ky.,"Ken., Kent.",,,,, +30,Louisiana,State,US-LA,LA,22,LA,LA,La.,La.,,,,,, +31,Maine,State,US-ME,ME,23,ME,ME,Maine,Maine,Me.,,,,, +32,Maryland,State,US-MD,MD,24,MD,MD,Md.,Md.,,,,,, +33,Massachusetts,State (Commonwealth),US-MA,MA,25,MA,MS,Mass.,Mass.,,,,,, +34,Michigan,State,US-MI,MI,26,MI,MC,Mich.,Mich.,,,,,, +35,Minnesota,State,US-MN,MN,27,MN,MN,Minn.,Minn.,Mn.,,,,, +36,Mississippi,State,US-MS,MS,28,MS,MI,Miss.,Miss.,,,,,, +37,Missouri,State,US-MO,MO,29,MO,MO,Mo.,Mo.,,,,,, +38,Montana,State,US-MT,MT,30,MT,MT,Mont.,Mont.,,,,,, +39,Nebraska,State,US-NE,NE,31,NE,NB,Nebr.,Neb.,,,,,, +40,Nevada,State,US-NV,NV,32,NV,NV,Nev.,Nev.,Nv.,,,,, +41,New Hampshire,State,US-NH,NH,33,NH,NH,N.H.,N.H.,,,,,, +42,New Jersey,State,US-NJ,NJ,34,NJ,NJ,N.J.,N.J.,N.Jersey,,,,, +43,New Mexico,State,US-NM,NM,35,NM,NM,N. Mex.,N.M.,New M.,,,,, +44,New York,State,US-NY,NY,36,NY,NY,N.Y.,N.Y.,N. York,,,,, +45,North Carolina,State,US-NC,NC,37,NC,NC,N.C.,N.C.,N. Car.,,,,, +46,North Dakota,State,US-ND,ND,38,ND,ND,N. Dak.,N.D.,NoDak,,,,, +47,Ohio,State,US-OH,OH,39,OH,OH,Ohio,Ohio,"O., Oh.",,,,, +48,Oklahoma,State,US-OK,OK,40,OK,OK,Okla.,Okla.,Ok.,,,,, +49,Oregon,State,US-OR,OR,41,OR,OR,Oreg.,Ore.,Or.,,,,, +50,Pennsylvania,State (Commonwealth),US-PA,PA,42,PA,PA,Pa.,Pa.,"Penn., Penna.",,,,, +51,Rhode Island,State,US-RI,RI,44,RI,RI,R.I.,R.I.,"R.I. & P.P., R. Isl.",,,,, +52,South Carolina,State,US-SC,SC,45,SC,SC,S.C.,S.C.,S. Car.,,,,, +53,South Dakota,State,US-SD,SD,46,SD,SD,S. Dak.,S.D.,SoDak,,,,, +54,Tennessee,State,US-TN,TN,47,TN,TN,Tenn.,Tenn.,,,,,, +55,Texas,State,US-TX,TX,48,TX,TX,Tex.,Texas,Tx.,,,,, +56,Utah,State,US-UT,UT,49,UT,UT,Utah,Utah,Ut.,,,,, +57,Vermont,State,US-VT,VT,50,VT,VT,Vt.,Vt.,,,,,, +58,Virginia,State (Commonwealth),US-VA,VA,51,VA,VA,Va.,Va.,Virg.,,,,, +59,Washington,State,US-WA,WA,53,WA,WN,Wash.,Wash.,"Wa., Wn.[2]",,,,, +60,West Virginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, +61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, +62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, +63,American Samoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,Northern Mariana Islands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,Puerto Rico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S. Virgin Islands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S. Minor Outlying Islands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, +69,Baker Island,island,UM-81,,81,,,,,XB[4],,,,, +70,Howland Island,island,UM-84,,84,,,,,XH[4],,,,, +71,Jarvis Island,island,UM-86,,86,,,,,XQ[4],,,,, +72,Johnston Atoll,atoll,UM-67,,67,,,,,XU[4],,,,, +73,Kingman Reef,atoll,UM-89,,89,,,,,XM[4],,,,, +74,Midway Islands,atoll,UM-71,,71,,,,,QM[4],,,,, +75,Navassa Island,island,UM-76,,76,,,,,XV[4],,,,, +76,Palmyra Atoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, +77,Wake Island,atoll,UM-79,,79,,,,,QW[4],,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,Marshall Islands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, +81,U.S. Armed Forces – Americas[6],US military mail code,,,,AA,,,,,,,,, +82,U.S. Armed Forces – Europe[7],US military mail code,,,,AE,,,,,,,,, +83,U.S. Armed Forces – Pacific[8],US military mail code,,,,AP,,,,,,,,, +84,Northern Mariana Islands,Obsolete postal code[9],,,,CM,,,,,,,,, +85,Panama Canal Zone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, +86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, +87,Philippine Islands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..e748973 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..c8988f6 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..a0fb9c8 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,15 @@ +# %load q01_load_data/build.py import pandas as pd +path = './data/excel-comp-data.xlsx' def q01_load_data(path): - "write your solution here" + df = pd.read_excel(path) + df['state'] = df['state'].str.lower() + df['total'] = df['Jan'] + df['Feb'] + df['Mar'] + return df + + + + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..7506925 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..eac3399 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..8945c40 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..be5ddf6 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..8138df7 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,11 +1,29 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +path = './data/excel-comp-data.xlsx' def q02_append_row(path): - "write your solution here" + df = q01_load_data(path) + total_jan = df['Jan'].sum() + total_feb = df['Feb'].sum() + total_mar = df['Mar'].sum() + total_grand = df['total'].sum() + + df2 = pd.DataFrame(columns= df.columns) + df2['Jan'] = [total_jan] + df2['Feb'] = [total_feb] + df2['Mar'] = [total_mar] + df2['total'] = [total_grand] + df = df.append(df2, ignore_index=True) + return df + + + + diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..dbb55f4 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..e018f72 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..ea35d7c 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..9b28851 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..5ee271f 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,22 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests sys.path.append(os.path.join(os.path.dirname(os.curdir))) + def q03_scrape_clean(url): - "write your solution here" + data = requests.get(url) + mlist = pd.read_html(data.text) + df = mlist[0] + df = df[11:-1] + df.to_csv('./data/scrapeddata.csv') + return df + + + + + + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..4e30b36 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..10be0b3 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..2673593 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..7d13394 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..ec7ee55 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,34 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row +from greyatomlib.pandas_guided_project.q03_scrape_clean.build import q03_scrape_clean + + +path1 = './data/excel-comp-data.xlsx' +path2 = './data/scraped.csv' + def q04_mapping(path1,path2): - "write your solution here" + 'write your solution here' + df = pd.read_csv(path2) + namesList = df['United States of America'].values + abbrList = df['US'].values + zippedList = list(zip(namesList, abbrList)) + mapping = dict(zippedList) + + df = pd.read_excel(path1) + abbr = list() + for state in df['state'].values: + if state in mapping.keys() : abbr.append(mapping[state]) + else : abbr.append(np.nan) + pd.Series(abbr) + + df.insert(6, 'abbr', abbr) + return df + + diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..0946976 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..fee0aa5 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc index f50c1d5..304c258 100644 Binary files a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc index 6a32964..c9a15bb 100644 Binary files a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py index 97d9755..410963e 100644 --- a/q05_replace_missing_values/build.py +++ b/q05_replace_missing_values/build.py @@ -1,13 +1,42 @@ +# %load q05_replace_missing_values/build.py import pandas as pd import numpy as np import sys import os -#sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q04_mapping.build import q04_mapping +# sys.path.append(os.path.join(os.path.dirname(os.curdir))) +# from greyatomlib.pandas_guided_project.q04_mapping.build import q04_mapping + +path1 = '/home/notebooks/data/excel-comp-data.xlsx' +path2 = '/home/notebooks/data/scraped.csv' + +def q04_mapping(path1,path2): + 'write your solution here' + df = pd.read_csv(path2) + namesList = df['United States of America'].values + abbrList = df['US'].values + zippedList = list(zip(namesList, abbrList)) + mapping = dict(zippedList) + + df = pd.read_excel(path1) + abbr = list() + for state in df['state'].values: + if state in mapping.keys() : abbr.append(mapping[state]) + else : abbr.append(np.nan) + pd.Series(abbr) + + df.insert(6, 'abbr', abbr) + return df -path1 = 'data/excel-comp-data.xlsx' -path2 = 'data/scraped.csv' def q05_replace_missing_values(path1,path2): + df = q04_mapping(path1, path2) + df.at[6, 'abbr'] = 'MS' + df.at[10, 'abbr'] = 'TN' + return df + + + + + + -#print(q05_replace_missing_values(path1,path2).shape) \ No newline at end of file diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 03391a7..8a10326 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc index 3b9d62a..a9a999e 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..a445d7c 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..8aa8ed1 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..2377bc6 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,16 +1,43 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys import os -#sys.path.append(os.path.join(os.path.dirname(os.curdir))) +sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q05_replace_missing_values.build import q05_replace_missing_values -path1 = 'data/excel-comp-data.xlsx' -path2 = 'data/scraped.csv' + +def q04_mapping(path1,path2): + 'write your solution here' + df = pd.read_csv(path2) + namesList = df['United States of America'].values + abbrList = df['US'].values + zippedList = list(zip(namesList, abbrList)) + mapping = dict(zippedList) + + df = pd.read_excel(path1) + abbr = list() + for state in df['state'].values: + if state in mapping.keys() : abbr.append(mapping[state]) + else : abbr.append(np.nan) + pd.Series(abbr) + + df.insert(6, 'abbr', abbr) + return df + +def q05_replace_missing_values(path1,path2): + df = q04_mapping(path1, path2) + df.at[6, 'abbr'] = 'MS' + df.at[10, 'abbr'] = 'TN' + return df def q06_sub_total(path1,path2): - "write your solution here" + 'write your solution here' + df = q05_replace_missing_values(path1, path2) + df = df.groupby(['abbr']).sum() + df = df[['account', 'Jan', 'Feb', 'Mar']] + return df diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..2b5c73d 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..edbccf3 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/__init__.cpython-36.pyc b/q07_symbols/__pycache__/__init__.cpython-36.pyc index 60b0cca..18fae25 100644 Binary files a/q07_symbols/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/build.cpython-36.pyc b/q07_symbols/__pycache__/build.cpython-36.pyc index d28eaa9..2d0fa3d 100644 Binary files a/q07_symbols/__pycache__/build.cpython-36.pyc and b/q07_symbols/__pycache__/build.cpython-36.pyc differ diff --git a/q07_symbols/build.py b/q07_symbols/build.py index b8cbb92..11bef5f 100644 --- a/q07_symbols/build.py +++ b/q07_symbols/build.py @@ -1,18 +1,55 @@ +# %load q07_symbols/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys import os +import locale sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +# from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' +def q04_mapping(path1,path2): + 'write your solution here' + df = pd.read_csv(path2) + namesList = df['United States of America'].values + abbrList = df['US'].values + zippedList = list(zip(namesList, abbrList)) + mapping = dict(zippedList) + + df = pd.read_excel(path1) + abbr = list() + for state in df['state'].values: + if state in mapping.keys() : abbr.append(mapping[state]) + else : abbr.append(np.nan) + pd.Series(abbr) + + df.insert(6, 'abbr', abbr) + return df + +def q05_replace_missing_values(path1,path2): + df = q04_mapping(path1, path2) + df.at[6, 'abbr'] = 'MS' + df.at[10, 'abbr'] = 'TN' + return df + +def q06_sub_total(path1,path2): + 'write your solution here' + df = q05_replace_missing_values(path1, path2) + df = df.groupby(['abbr']).sum() + df = df[['account', 'Jan', 'Feb', 'Mar']] + return df + + def q07_symbols(path1,path2): - "write your solution here" + 'write your solution here' + df = q06_sub_total(path1, path2) + df['total'] = df['Jan'] + df['Feb'] + df['Mar'] + df = df.applymap(lambda x : '$' + '{:2,}'.format(x)) + return df -#print(q07_symbols(path1,path2)) diff --git a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc index f854b4a..f982c8d 100644 Binary files a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/tests/__pycache__/test.cpython-36.pyc b/q07_symbols/tests/__pycache__/test.cpython-36.pyc index 1a8a9c3..3990ed5 100644 Binary files a/q07_symbols/tests/__pycache__/test.cpython-36.pyc and b/q07_symbols/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc b/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc index df1c3a2..1c08fab 100644 Binary files a/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc and b/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_append_subtotals/__pycache__/build.cpython-36.pyc b/q08_append_subtotals/__pycache__/build.cpython-36.pyc index d03d4af..bb824b9 100644 Binary files a/q08_append_subtotals/__pycache__/build.cpython-36.pyc and b/q08_append_subtotals/__pycache__/build.cpython-36.pyc differ diff --git a/q08_append_subtotals/build.py b/q08_append_subtotals/build.py index 96e2f9e..595fda8 100644 --- a/q08_append_subtotals/build.py +++ b/q08_append_subtotals/build.py @@ -1,15 +1,59 @@ +# %load q08_append_subtotals/build.py import pandas as pd import numpy as np import sys,os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total -from greyatomlib.pandas_guided_project.q07_symbols.build import q07_symbols +# from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +# from greyatomlib.pandas_guided_project.q07_symbols.build import q07_symbols path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' +def q04_mapping(path1,path2): + 'write your solution here' + df = pd.read_csv(path2) + namesList = df['United States of America'].values + abbrList = df['US'].values + zippedList = list(zip(namesList, abbrList)) + mapping = dict(zippedList) + + df = pd.read_excel(path1) + abbr = list() + for state in df['state'].values: + if state in mapping.keys() : abbr.append(mapping[state]) + else : abbr.append(np.nan) + pd.Series(abbr) + + df.insert(6, 'abbr', abbr) + return df + +def q05_replace_missing_values(path1,path2): + df = q04_mapping(path1, path2) + df.at[6, 'abbr'] = 'MS' + df.at[10, 'abbr'] = 'TN' + return df + +def q06_sub_total(path1,path2): + 'write your solution here' + df = q05_replace_missing_values(path1, path2) + df = df.groupby(['abbr']).sum() + df = df[['account', 'Jan', 'Feb', 'Mar']] + return df + def q08_append_subtotals(path1,path2): - "write your solution here" + 'write your solution here' + df = q06_sub_total(path1,path2) + df2 = pd.DataFrame({ + 'account': [df['account'].sum()], + 'Jan': [df['Jan'].sum()], + 'Feb': [df['Feb'].sum()], + 'Mar': [df['Mar'].sum()] + }, index=['total']) + df = df.append(df2) + df = df.applymap(lambda x : '$' + '{:2,}'.format(x)) + return df + + diff --git a/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc b/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc index 21f4cd0..34cd2d6 100644 Binary files a/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc and b/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc b/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc index da1ab93..515ab4e 100644 Binary files a/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc and b/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc differ