-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_prcocessing_oop_sol.py
More file actions
134 lines (111 loc) · 4.26 KB
/
data_prcocessing_oop_sol.py
File metadata and controls
134 lines (111 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import copy
import csv
import os
__location__ = os.path.realpath(
os.path.join(os.getcwd(), os.path.dirname(__file__)))
cities = []
with open(os.path.join(__location__, 'Cities.csv')) as f:
rows = csv.DictReader(f)
for r in rows:
cities.append(dict(r))
countries = []
with open(os.path.join(__location__, 'Countries.csv')) as f:
rows = csv.DictReader(f)
for r in rows:
countries.append(dict(r))
class DB:
def __init__(self):
self.database = []
def insert(self, table):
self.database.append(table)
def search(self, table_name):
for table in self.database:
if table.table_name == table_name:
return table
return None
class Table:
def __init__(self, table_name, table):
self.table_name = table_name
self.table = table
def join(self, other_table, common_key):
joined_table = Table(self.table_name + '_joins_' +
other_table.table_name, [])
for item1 in self.table:
for item2 in other_table.table:
if item1[common_key] == item2[common_key]:
dict1 = copy.deepcopy(item1)
dict2 = copy.deepcopy(item2)
dict1.update(dict2)
joined_table.table.append(dict1)
return joined_table
def filter(self, condition):
filtered_table = Table(self.table_name + '_filtered', [])
for item1 in self.table:
if condition(item1):
filtered_table.table.append(item1)
return filtered_table
def aggregate(self, function, aggregation_key):
temps = []
for item1 in self.table:
temps.append(float(item1[aggregation_key]))
return function(temps)
def select(self, attributes_list):
temps = []
for item1 in self.table:
dict_temp = {}
for key in item1:
if key in attributes_list:
dict_temp[key] = item1[key]
temps.append(dict_temp)
return temps
def __str__(self):
return self.table_name + ':' + str(self.table)
table1 = Table('cities', cities)
table2 = Table('countries', countries)
my_DB = DB()
my_DB.insert(table1)
my_DB.insert(table2)
my_table1 = my_DB.search('cities')
print("Test filter: only filtering out cities in Italy")
my_table1_filtered = my_table1.filter(lambda x: x['country'] == 'Italy')
print(my_table1_filtered)
print()
print("Test select: only displaying two fields, city and latitude, for cities in Italy")
my_table1_selected = my_table1_filtered.select(['city', 'latitude'])
print(my_table1_selected)
print()
print("Calculting the average temperature without using aggregate for cities in Italy")
temps = []
for item in my_table1_filtered.table:
temps.append(float(item['temperature']))
print(sum(temps)/len(temps))
print()
print("Calculting the average temperature using aggregate for cities in Italy")
print(my_table1_filtered.aggregate(lambda x: sum(x)/len(x), 'temperature'))
print()
print("Test join: finding cities in non-EU countries whose temperatures are below 5.0")
my_table2 = my_DB.search('countries')
my_table3 = my_table1.join(my_table2, 'country')
my_table3_filtered = my_table3.filter(lambda x: x['EU'] == 'no').filter(
lambda x: float(x['temperature']) < 5.0)
print(my_table3_filtered.table)
print()
print("Selecting just three fields, city, country, and temperature")
print(my_table3_filtered.select(['city', 'country', 'temperature']))
print()
print("Print the min and max temperatures for cities in EU that do not have coastlines")
my_table3_filtered = my_table3.filter(
lambda x: x['EU'] == 'yes').filter(lambda x: x['coastline'] == 'no')
print("Min temp:", my_table3_filtered.aggregate(
lambda x: min(x), 'temperature'))
print("Max temp:", my_table3_filtered.aggregate(
lambda x: max(x), 'temperature'))
print()
print("Print the min and max latitude for cities in every country")
for item in my_table2.table:
my_table1_filtered = my_table1.filter(
lambda x: x['country'] == item['country'])
if len(my_table1_filtered.table) >= 1:
print(item['country'], my_table1_filtered.aggregate(lambda x: min(
x), 'latitude'), my_table1_filtered.aggregate(lambda x: max(x), 'latitude'))
print()