Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Doing this project will enable you to integrate Multiple data sources to answer
You also learn to perform common excel tasks with pandas


## What will you learn in the session ?
## What will you learn in the session?
- Python Basics
- Pandas
- Web Scrapping
Expand All @@ -20,4 +20,4 @@ You also learn to perform common excel tasks with pandas

## Pre-requisites
- Working knowledge of Pandas, Numpy, Matplotlib
- Data indexing and slicing
- Data indexing along with slicing
Binary file modified __pycache__/__init__.cpython-36.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion q01_load_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
}
Binary file modified q01_load_data/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/__pycache__/build.cpython-36.pyc
Binary file not shown.
10 changes: 8 additions & 2 deletions q01_load_data/build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# %load q01_load_data/build.py
import pandas as pd

path = 'data/excel-comp-data.xlsx'

def q01_load_data(path):
"write your solution here"
df = pd.read_excel(path)
df['state'] = df['state'].str.lower()
df['total'] = df['Jan'] + df['Feb'] + df['Mar']
return df
q01_load_data(path)

Binary file modified q01_load_data/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/tests/__pycache__/tests.cpython-36.pyc
Binary file not shown.
14 changes: 9 additions & 5 deletions q02_append_row/build.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import pandas as pd
import sys, os
#sys.path.append(os.path.join(os.path.dirname(os.curdir)))
from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data


def q02_append_row(path):
"write your solution here"


df = q01_load_data(path)
df1 = df[['Jan', 'Feb', 'Mar', 'total']].sum(axis=0)
df1 = pd.DataFrame(df1)
df2 = pd.DataFrame(columns=['Jan', 'Feb', 'Mar', 'total'])
df2['Jan'] = df1.loc['Jan']
df2['Feb'] = df1.loc['Feb']
df2['Mar'] = df1.loc['Mar']
df2['total'] = df1.loc['total']
df3 = df.append(df2, ignore_index=True)


23 changes: 22 additions & 1 deletion q03_scrape_clean/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,26 @@


def q03_scrape_clean(url):
"write your solution here"
page = requests.get(url)

df = pd.read_html(page.text)
df1 = df[0].iloc[11:,0]
df2 = df[0].iloc[11:,1]
df3 = df[0].iloc[11:,2]
df4 = df[0].iloc[11:,3]
df5 = df[0].iloc[11:,4]
df6 = df[0].iloc[11:,5]
df7 = df[0].iloc[11:,6]
df8 = df[0].iloc[11:,7]
df9 = df[0].iloc[11:,8]
df10 = df[0].iloc[11:,9]

ans = pd.concat([df1,df2,df3,df4,df5,df6,df7,df8,df9,df10], axis=1)
ans.rename(mapper={0:'Name', 1:'Status', 2:'ISO', 3:'ANSI0', 4:'ANSI1', 5:'USPS', 6:'USCG', 7:'GPO', 8:'AP', 9:'Other Abbrevations'}, inplace=True, axis=1)
ans.drop(ans.index[0], axis=0, inplace=True)
ans['ex1'] = 0
ans['ex2'] = 0
ans['ex3'] = 0
ans['ex4'] = 0
ans['ex5'] = 0