-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPandas.py
More file actions
280 lines (220 loc) · 5 KB
/
Pandas.py
File metadata and controls
280 lines (220 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
########## PANDAS ##########
import pandas as pd
### SERIES
## create a serie
# from a list
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])
print(g7_pop)
'''
0 35.467
1 63.951
2 80.940
3 60.665
4 127.061
5 64.511
6 318.523
dtype: float64
'''
# from a dictionnary
g7_pop = pd.Series({
'Canada': 35.467,
'France': 63.951,
'Germany': 80.94,
'Italy': 60.665,
'Japan': 127.061,
'United Kingdom': 64.511,
'United States': 318.523
}, name='G7 Population in millions')
print(g7_pop)
'''
Canada 35.467
France 63.951
Germany 80.940
Italy 60.665
Japan 127.061
United Kingdom 64.511
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
# from another serie
new_g4_pop = pd.Series(g7_pop, index=['France', 'Germany', 'Italy', 'Spain'])
print(new_g4_pop)
'''
France 63.951
Germany 80.940
Italy 60.665
Spain NaN
Name: G7 Population in millions, dtype: float64
'''
## giving a name to a serie
g7_pop.name = 'G7 Population in millions'
print(g7_pop)
'''
0 35.467
1 63.951
2 80.940
3 60.665
4 127.061
5 64.511
6 318.523
Name: G7 Population in millions, dtype: float64
'''
## print attributes from a serie
print(g7_pop.dtype)
# float64
print(g7_pop.mean()) # moyenne
# 107.30257142857144
print(g7_pop.std()) # écart type
# 97.24996987121581
print(g7_pop.values)
# [ 35.467 63.951 80.94 60.665 127.061 64.511 318.523]
print(type(g7_pop.values))
# <class 'numpy.ndarray'>
## index
# series are iterable
# ==> Un seul index : pandas retourne une Serie
# Plusieurs indexes** : pandas retourne un DataFrame
# IMPORTANT, in PANDAS upper limits is INCLUDED
print(g7_pop[0])
# 35.467
print(g7_pop.index)
# RangeIndex(start=0, stop=7, step=1)
# in contrast to lists, we can explicitly define the index:
g7_pop.index = [
'Canada',
'France',
'Germany',
'Italy',
'Japan',
'United Kingdom',
'United States',
]
print(g7_pop)
'''
Canada 35.467
France 63.951
Germany 80.940
Italy 60.665
Japan 127.061
United Kingdom 64.511
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
print(g7_pop['Canada'])
# 35.467
print(g7_pop[['Italy', 'France']])
'''
Italy 60.665
France 63.951
'''
# Numeric positions can also be used, with the "iloc" attribute
print(g7_pop.iloc[0])
# 35.467
print(g7_pop.iloc[-1])
# 318.523
print(g7_pop.iloc[[0, 1]])
'''
Canada 35.467
France 63.951
Name: G7 Population in millions, dtype: float64
'''
## slicing
# LAST index INCLUDED !!!
print(g7_pop['Canada': 'Italy'])
'''
Canada 35.467
France 63.951
Germany 80.940
Italy 60.665 # LAST index INCLUDED !!!
Name: G7 Population in millions, dtype: float64
'''
## modifying series
g7_pop['Canada'] = 40.5
print(g7_pop)
'''
Canada 40.500 # value replaced
France 63.951
Germany 80.940
Italy 60.665
Japan 127.061
United Kingdom 64.511
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
g7_pop.iloc[-1] = 500
print(g7_pop)
'''
Canada 40.500
France 63.951
Germany 80.940
Italy 60.665
Japan 127.061
United Kingdom 64.511
United States 500.000 # value replaced
Name: G7 Population in millions, dtype: float64
'''
g7_pop[g7_pop < 70] = 99.99
print(g7_pop)
'''
Canada 99.990
France 99.990
Germany 80.940
Italy 99.990
Japan 127.061
United Kingdom 99.990
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
### Conditional selection - boolean arrays
print(g7_pop > 70)
'''
Canada False
France False
Germany True
Italy False
Japan True
United Kingdom False
United States True
Name: G7 Population in millions, dtype: bool
'''
print(g7_pop[g7_pop > 70])
'''
Germany 80.940
Japan 127.061
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
print(g7_pop[g7_pop > g7_pop.mean()])
'''
Japan 127.061
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
print(g7_pop[(g7_pop > 80) | (g7_pop < 40)])
'''
Canada 35.467
Germany 80.940
Japan 127.061
United States 318.523
Name: G7 Population in millions, dtype: float64
'''
print(g7_pop[(g7_pop > 80) & (g7_pop < 200)])
'''
Germany 80.940
Japan 127.061
Name: G7 Population in millions, dtype: float64
'''
### Operations and methods
print(g7_pop * 1_000_000)
'''
Canada 35467000.0
France 63951000.0
Germany 80940000.0
Italy 60665000.0
Japan 127061000.0
United Kingdom 64511000.0
United States 318523000.0
Name: G7 Population in millions, dtype: float64
'''
print(g7_pop['France': 'Italy'].mean())
# 68.51866666666666