Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .ipynb_checkpoints/given-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
224 changes: 224 additions & 0 deletions .ipynb_checkpoints/jdunca51-checkpoint.ipynb

Large diffs are not rendered by default.

203 changes: 203 additions & 0 deletions .ipynb_checkpoints/rhossai2-checkpoint.ipynb

Large diffs are not rendered by default.

Binary file added compare.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion compareRels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
headers = {'Accept': 'application/vnd.github.hellcat-preview+json'}

db = client['fdac18mp2'] # added in class
collName = 'releases_audris'
collName = 'releases_rhossai2'
coll = db [collName]
def wait (left):
while (left < 20):
Expand Down
2 changes: 1 addition & 1 deletion extrNpm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pymongo, json, sys
client = pymongo.MongoClient (host="da1")
db = client ['fdac18mp2']
id = "audris"
id = "rhossai2"
coll = db [ 'npm_' + id]
for r in coll.find():
if 'collected' in r:
Expand Down
2 changes: 1 addition & 1 deletion extrRels.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pymongo, json, sys
client = pymongo.MongoClient (host="da1")
db = client ['fdac18mp2']
id = "audris"
id = "rhossai2"
coll = db [ 'releases_' + id]
for r in coll.find():
n = r['name']
Expand Down
147 changes: 147 additions & 0 deletions given.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:60: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n",
"/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:80: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n"
]
}
],
"source": [
"import sys\n",
"import re\n",
"import pymongo\n",
"import json\n",
"import time\n",
"import datetime\n",
"import requests\n",
"\n",
"dbname = \"fdac18mp2\" #please use this database\n",
"collname = \"glprj_rhossai2\" #please modify so you store data in your collection\n",
"# beginning page index\n",
"begin = \"0\"\n",
"client = pymongo.MongoClient(host='da1')\n",
"\n",
"db = client[dbname]\n",
"coll = db[collname]\n",
"\n",
"\n",
"beginurl = \"https://gitlab.com/api/v4/projects?archived=false&membership=false&order_by=created_at&owned=false&page=\" + begin + \\\n",
" \"&per_page=99&simple=false&sort=desc&starred=false&statistics=false&with_custom_attributes=false&with_issues_enabled=false&with_merge_requests_enabled=false\"\n",
"\n",
"\n",
"gleft = 0\n",
"\n",
"header = {'per_page': '99'}\n",
"\n",
"# check remaining query chances for rate-limit restriction\n",
"def wait(left):\n",
" global header\n",
" while (left < 20):\n",
" l = requests.get('https://gitlab.com/api/v4/projects', headers=header)\n",
" if (l.ok):\n",
" left = int(l.headers.get('RateLimit-Remaining'))\n",
" time .sleep(60)\n",
" return left\n",
"\n",
"# send queries and extract urls \n",
"def get(url, coll):\n",
"\n",
" global gleft\n",
" global header\n",
" global bginnum\n",
" gleft = wait(gleft)\n",
" values = []\n",
" size = 0\n",
"\n",
" try:\n",
" r = requests .get(url, headers=header)\n",
" time .sleep(0.5)\n",
" # got blocked\n",
" if r.status_code == 403:\n",
" return \"got blocked\", str(bginnum)\n",
" if (r.ok):\n",
"\n",
" gleft = int(r.headers.get('RateLimit-Remaining'))\n",
" lll = r.headers.get('Link')\n",
" t = r.text\n",
" array = json.loads(t)\n",
" \n",
" for el in array:\n",
" coll.insert(el)\n",
" \n",
" #next page\n",
" while ('; rel=\"next\"' in lll):\n",
" gleft = int(r.headers.get('RateLimit-Remaining'))\n",
" gleft = wait(gleft)\n",
" # extract next page url\n",
" ll = lll.replace(';', ',').split(',')\n",
" url = ll[ll.index(' rel=\"next\"') -\n",
" 1].replace('<', '').replace('>', '').lstrip()\n",
" \n",
" try:\n",
" r = requests .get(url, headers=header)\n",
" if r.status_code == 403:\n",
" return \"got blocked\", str(bginnum)\n",
" if (r.ok):\n",
" lll = r.headers.get('Link')\n",
" t = r.text\n",
" array1 = json.loads(t)\n",
" for el in array1:\n",
" coll.insert(el)\n",
" else:\n",
" sys.stderr.write(\"url can not found:\\n\" + url + '\\n')\n",
" return \n",
" except requests.exceptions.ConnectionError:\n",
" sys.stderr.write('could not get ' + url + '\\n')\n",
"\n",
" else:\n",
" sys.stderr.write(\"url can not found:\\n\" + url + '\\n')\n",
" return\n",
"\n",
" except requests.exceptions.ConnectionError:\n",
" sys.stderr.write('could not get ' + url + '\\n')\n",
" except Exception as e:\n",
" sys.stderr.write(url + ';' + str(e) + '\\n')\n",
" \n",
"#start retrieving \n",
"get(beginurl,coll)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading