-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHelper.rb
More file actions
155 lines (134 loc) · 4.02 KB
/
Helper.rb
File metadata and controls
155 lines (134 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#Auxiliar functions for script
require 'csv'
class Helper
def initialize(data_source)
@csv_file = data_source
@users = Hash.new
@repos = Hash.new
save_user_csv_header
end
def pagination header
links = {}
header['link'].split(',').each do |link|
link.strip!
parts = link.match(/<(.+)>; *rel="(.+)"/)
links[parts[2]] = parts[1]
end
return links
end
def verify_rate_limit header
if header['x-ratelimit-remaining'].to_i < 2
wait = Time.at(header['x-ratelimit-reset'].to_i) - Time.now
puts '[log] Low rate limit, wait ' + wait.to_s + 'seconds to reset'
if(wait > 0); sleep(wait) end
end
end
def save_users response, token
if !response.nil? and response.has_key?('items') then
response['items'].each do |item|
username = item['repository']['owner']['login']
reponame = item['repository']['name']
puts "Visiting " + username + "/" + reponame
if !@repos.has_key?(username + '/' + reponame) then
save_user(username, token)
save_collaborators(username, reponame, token)
save_contributors(username, reponame, token)
@repos[username + '/' + reponame] = true
else
puts "Repository already visited, skipping."
end
end
else
puts "An error occurred. Printing response."
ap response
end
end
def save_user_csv_header
begin
CSV.open(@csv_file, "ab") do |writer|
writer << ['username',
'email',
'name',
'location',
'blog',
'company',
'public_repos',
'followers',
'following',
'created_at']
end
rescue Exception => e
puts e.inspect
end
end
def save_user_csv(user)
begin
CSV.open(@csv_file, "ab") do |writer|
writer << user.values
end
rescue Exception => e
puts e.inspect
end
end
def save_user login, token
if !@users.has_key?(login) then
url = 'https://api.github.com/users/' + login
response = get_response(url, token)
ap response
user = {'login' => login,
'email' => response['email'],
'name' => response['name'],
'location' => response['location'],
'blog' => response['blog'],
'company' => response['company'],
'public_repos' => response['public_repos'],
'followers' => response['followers'],
'following' => response['following'],
'created_at' => response['created_at']}
# add to a hash
@users[login] = user
# save to csv if email is not nil
if !email.nil? && !email.empty? then
save_user_csv(user)
puts "User #{login} saved."
else
puts "User #{login} has a null or blank email address. Adding to hash and skipping csv."
end
else
puts "User #{login} already in database."
end
end
def save_contributors owner, repos, token
url = 'https://api.github.com/repos/' + owner + '/' + repos + '/contributors'
response = get_response(url, token)
response.each do |item|
begin
save_user(item['login'], token)
rescue Exception => e
puts e.inspect
end
end
end
def save_collaborators owner, repos, token
url = 'https://api.github.com/repos/' + owner + '/' + repos + '/collaborators'
response = get_response(url, token)
response.each do |item|
begin
save_user(item['login'], token)
rescue Exception => e
puts e.inspect
end
end
end
def get_response url, token
headers = { 'Accept' => 'application/vnd.github.preview.text-match+json', 'User-Agent' => 'coopera-codesearch' }
url += '?' + token
begin
response = HTTParty.get(url, headers)
verify_rate_limit(response.headers)
rescue Exception => e
puts e.inspect
end
return response
end
end