Skip to content

Commit 9aea992

Browse files
committed
re-arranged data sets for pandas examples and added pyinstrument to useful packages list
1 parent 0c71522 commit 9aea992

File tree

15 files changed

+187657
-7856
lines changed

15 files changed

+187657
-7856
lines changed

PyData/UsefulPackages.txt

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pyflame: https://github.com/uber/pyflame # Does not support Windows
3333
birdseye: https://github.com/alexmojaki/birdseye
3434
pyheat: https://github.com/csurfer/pyheat
3535
heartrate: https://github.com/alexmojaki/heartrate
36+
pyinstrument: https://github.com/joerick/pyinstrument
3637
https://www.markkeller.dev/2018-07-14-optimize_python/
3738

3839
# Forecasting
@@ -71,8 +72,9 @@ bqplot: https://github.com/bloomberg/bqplot
7172
pdvega
7273
bokeh / HoloViews / hvplot / pandas-bokeh (pyviz stack)
7374
chartify: https://github.com/spotify/chartify
74-
folium
75-
geoplot
75+
folium: https://github.com/python-visualization/folium
76+
geoplot: https://github.com/ResidentMario/geoplot
77+
movingpandas: https://github.com/anitagraser/movingpandas
7678
plotnine: clone of R's ggplot2
7779
joypy: https://github.com/sbebo/joypy/blob/master/Joyplot.ipynb
7880
jmpy
@@ -86,6 +88,14 @@ Rain clouds viz: https://github.com/pog87/PtitPrince
8688
jupyterplot: https://lvwerra.github.io/jupyterplot/
8789
cyberpunk theme: https://github.com/dhaitz/mplcyberpunk/blob/master/README.md
8890
upsetplot: https://github.com/jnothman/UpSetPlot
91+
keplergl (PITA to install onto Windows): https://github.com/keplergl/kepler.gl/issues/557
92+
93+
# geo visualization libraries:
94+
folium: https://github.com/python-visualization/folium
95+
geoplot: https://github.com/ResidentMario/geoplot
96+
ipyleaflet: https://github.com/jupyter-widgets/ipyleaflet/
97+
movingpandas: https://github.com/anitagraser/movingpandas
98+
keplergl: keplergl (PITA to install onto Windows): https://github.com/keplergl/kepler.gl/issues/557
8999

90100
# Animation related
91101
celluloid: https://github.com/jwkvam/celluloid
@@ -204,14 +214,16 @@ https://github.com/RobinL/fuzzymatcher
204214
https://github.com/J535D165/recordlinkage
205215
scikit-neuralnetwork: https://github.com/aigamedev/scikit-neuralnetwork
206216

207-
# Webscraping
217+
# HTTP / Webscraping
208218
beautifulsoup4
209219
lxml+XPath
210220
mechanicalsoup
211221
selenium (headless-browser framework for extracting javascript data)
212222
scrapy (OOP framework)
213223
newspaper3k: https://newspaper.readthedocs.io # easily extract text from articles
214-
requests-html: requests-html: https://github.com/kennethreitz/requests-html
224+
requests-html: requests-html: https://github.com/kennethreitz/requests-htm
225+
HTTPX: https://github.com/encode/httpx
226+
autoscraper: https://github.com/alirezamika/autoscraper
215227

216228
# Big Data
217229
boto3: Interface to S3

dask/coiled/Basic_Example.ipynb

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from dask.distributed import Client\n",
10+
"import coiled\n",
11+
"import dask.dataframe as dd"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"cluster = coiled.Cluster(n_workers=10)\n",
21+
"client = Client(cluster)\n",
22+
"print('Dashboard:', client.dashboard_link)"
23+
]
24+
},
25+
{
26+
"cell_type": "code",
27+
"execution_count": null,
28+
"metadata": {},
29+
"outputs": [],
30+
"source": [
31+
"df = dd.read_csv(\n",
32+
" \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv\",\n",
33+
" dtype={\n",
34+
" \"payment_type\": \"UInt8\",\n",
35+
" \"VendorID\": \"UInt8\",\n",
36+
" \"passenger_count\": \"UInt8\",\n",
37+
" \"RatecodeID\": \"UInt8\",\n",
38+
" },\n",
39+
" storage_options={\"anon\": True},\n",
40+
" blocksize=\"16 MiB\",\n",
41+
").persist()\n",
42+
"\n",
43+
"df.groupby(\"passenger_count\").tip_amount.mean().compute()"
44+
]
45+
}
46+
],
47+
"metadata": {
48+
"kernelspec": {
49+
"display_name": "Python (coiled_dev)",
50+
"language": "python",
51+
"name": "coiled_dev"
52+
},
53+
"language_info": {
54+
"codemirror_mode": {
55+
"name": "ipython",
56+
"version": 3
57+
},
58+
"file_extension": ".py",
59+
"mimetype": "text/x-python",
60+
"name": "python",
61+
"nbconvert_exporter": "python",
62+
"pygments_lexer": "ipython3",
63+
"version": "3.8.5"
64+
}
65+
},
66+
"nbformat": 4,
67+
"nbformat_minor": 4
68+
}

hvplot/data/Generate_CPU_Data.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# pip install psutil
2+
import csv
3+
import psutil
4+
import time
5+
6+
x_value = 0
7+
8+
fieldnames = ["x_value", "cpu_perc"]
9+
10+
11+
with open('data.csv', 'w') as csv_file:
12+
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
13+
csv_writer.writeheader()
14+
15+
while True:
16+
17+
with open('data.csv', 'a') as csv_file:
18+
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
19+
y_value = psutil.cpu_percent()
20+
21+
info = {
22+
"x_value": x_value,
23+
"cpu_perc": y_value
24+
}
25+
26+
csv_writer.writerow(info)
27+
print(x_value, y_value)
28+
29+
x_value += 1
30+
31+
time.sleep(1)

0 commit comments

Comments
 (0)