GitRecap/app.py at main · aprameyak/GitRecap · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
import os
from dotenv import load_dotenv
import requests
from flask import Flask, jsonify, request
from flask_cors import CORS
from datetime import datetime, timedelta
from collections import defaultdict
from textblob import TextBlob
import time
from functools import wraps
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
import re

load_dotenv()

# Removed global GitHub access token as per security requirements
token = None

def get_headers(user_token=None):
    headers = {
        'Accept': 'application/vnd.github.v3+json'
    }
    token_to_use = user_token or token
    if token_to_use:
        headers['Authorization'] = f'token {token_to_use}'
    return headers

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

@app.route('/')
def home():
    return jsonify({"status": "ok", "message": "GitRecap API is running"})

limiter = Limiter(
    app=app,
    key_func=get_remote_address,
    default_limits=["200 per day", "50 per hour"]
)

@app.route('/trending', methods=['GET'])
@limiter.limit("100 per hour")
def get_trending_users():
    """Get trending GitHub users for discovery"""
    trending_users = [
        {
            'username': 'octocat',
            'name': 'GitHub Octocat',
            'description': 'GitHub mascot and demo account',
            'avatar_url': 'https://github.com/octocat.png?size=100',
            'followers': 10000,
            'repos': 8
        },
        {
            'username': 'torvalds',
            'name': 'Linus Torvalds',
            'description': 'Creator of Linux and Git',
            'avatar_url': 'https://github.com/torvalds.png?size=100',
            'followers': 150000,
            'repos': 2
        },
        {
            'username': 'antirez',
            'name': 'Salvatore Sanfilippo',
            'description': 'Creator of Redis',
            'avatar_url': 'https://github.com/antirez.png?size=100',
            'followers': 12000,
            'repos': 15
        },
        {
            'username': 'gvanrossum',
            'name': 'Guido van Rossum',
            'description': 'Creator of Python',
            'avatar_url': 'https://github.com/gvanrossum.png?size=100',
            'followers': 8000,
            'repos': 5
        },
        {
            'username': 'jashkenas',
            'name': 'Jeremy Ashkenas',
            'description': 'Creator of Backbone.js and CoffeeScript',
            'avatar_url': 'https://github.com/jashkenas.png?size=100',
            'followers': 20000,
            'repos': 25
        },
        {
            'username': 'defunkt',
            'name': 'Chris Wanstrath',
            'description': 'GitHub co-founder',
            'avatar_url': 'https://github.com/defunkt.png?size=100',
            'followers': 25000,
            'repos': 30
        }
    ]

    return jsonify({
        'trending_users': trending_users,
        'total': len(trending_users)
    })

@app.route('/stats', methods=['GET'])
@limiter.limit("50 per hour")
def get_api_stats():
    """Get API usage statistics"""
    return jsonify({
        'total_requests': 'Tracked via rate limiting',
        'rate_limit': '30 requests per minute per IP',
        'features': [
            'GitHub profile analysis',
            'Commit pattern analysis',
            'Language usage statistics',
            'Activity heatmaps',
            'Developer personality insights',
            'Repository analytics'
        ],
        'supported_analytics': [
            'Commit frequency patterns',
            'Language preferences',
            'Activity time distribution',
            'Repository collaboration',
            'Developer personality types',
            'Productivity metrics'
        ]
    })

def sanitize_username(username):
    if not username:
        return None
    username = username.strip()
    if not re.match(r'^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$', username):
        return None
    return username

def debug_request(response):
    print(f"Request to {response.url}")
    print(f"Status: {response.status_code}")
    print(f"Rate limit: {response.headers.get('X-RateLimit-Remaining')}/{response.headers.get('X-RateLimit-Limit')}")
    if response.status_code != 200:
        print(f"Error response: {response.text[:200]}")

def get_all_pages(url, headers, max_pages=10):
    items = []
    page_count = 0

    while url and page_count < max_pages:
        try:
            response = requests.get(url, headers=headers, timeout=15)
            debug_request(response)
            page_count += 1

            if response.status_code == 403 and 'rate limit exceeded' in response.text.lower():
                reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
                wait_time = max(reset_time - time.time(), 0)
                if wait_time > 300:  # Don't wait more than 5 minutes
                    print("Rate limit exceeded, stopping pagination")
                    break
                if wait_time > 0:
                    time.sleep(min(wait_time, 60))  # Cap wait time at 1 minute
                    continue
                else:
                    break

            if response.status_code != 200:
                print(f"API request failed with status {response.status_code}")
                break

            data = response.json()
            if not isinstance(data, list):
                print("Unexpected response format")
                break

            items.extend(data)

            # Prevent infinite loops
            next_url = response.links.get('next', {}).get('url')
            if next_url == url:  # Same URL indicates a problem
                print("Detected potential infinite loop, breaking")
                break
            url = next_url

            remaining = int(response.headers.get('X-RateLimit-Remaining', 0))
            if remaining < 10:
                time.sleep(2)

        except requests.exceptions.RequestException as e:
            print(f"Request failed: {str(e)}")
            break
        except Exception as e:
            print(f"Unexpected error during pagination: {str(e)}")
            break

    if page_count >= max_pages:
        print(f"Reached maximum page limit ({max_pages}), stopping pagination")

    return items

def get_language_color(language):
    colors = {
        'Python': '#3572A5', 'JavaScript': '#F1E05A', 'TypeScript': '#3178C6',
        'Java': '#B07219', 'C++': '#F34B7D', 'Go': '#00ADD8',
        'Ruby': '#701516', 'Swift': '#FFAC45', 'Kotlin': '#A97BFF',
        'HTML': '#E34C26', 'CSS': '#563D7C', 'Jupyter Notebook': '#DA5B0B',
        'Shell': '#89E051', 'PHP': '#4F5D95', 'Rust': '#DEA584',
        'SCSS': '#C6538C', 'Dart': '#00B4AB', 'Elixir': '#6E4A7E',
        'Vue': '#41B883', 'R': '#198CE7', 'MATLAB': '#E16737'
    }
    return colors.get(language, '#6E40C9')

def normalize_language_percentages(languages):
    if not languages:
        return []

    total = sum(lang['count'] for lang in languages)
    if total == 0:
        return []

    normalized = []
    remaining = 100.0
    for i, lang in enumerate(languages):
        if i == len(languages) - 1:
            normalized.append({**lang, 'percentage': round(remaining, 1)})
        else:
            percentage = round((lang['count'] / total) * 100, 1)
            remaining -= percentage
            normalized.append({**lang, 'percentage': percentage})

    return normalized

def get_weekly_commits(commits, one_year_ago):
    weekly_commits = [0] * 52
    for commit in commits:
        if isinstance(commit, dict) and 'commit' in commit:
            try:
                date = datetime.strptime(commit['commit']['author']['date'], '%Y-%m-%dT%H:%M:%SZ')
                days_since = (date - one_year_ago).days
                if 0 <= days_since < 365:
                    week_num = days_since // 7
                    if week_num < 52:
                        weekly_commits[week_num] += 1
            except (ValueError, KeyError):
                continue
    return weekly_commits

def analyze_commit_sentiment(commits):
    messages = [c['commit']['message'] for c in commits if isinstance(c, dict) and 'commit' in c]
    if not messages:
        return None

    analysis = {
        'positive': 0,
        'neutral': 0,
        'negative': 0,
        'average_polarity': 0,
        'common_words': {},
        'commit_types': {
            'feature': 0,
            'bugfix': 0,
            'refactor': 0,
            'docs': 0,
            'chore': 0,
            'other': 0
        }
    }

    word_counts = defaultdict(int)
    stop_words = {"the", "and", "a", "an", "in", "on", "at", "to", "of", "for"}

    for message in messages:
        try:
            blob = TextBlob(message)
            polarity = blob.sentiment.polarity
            analysis['average_polarity'] += polarity

            if polarity > 0.2:
                analysis['positive'] += 1
            elif polarity < -0.2:
                analysis['negative'] += 1
            else:
                analysis['neutral'] += 1

            msg_lower = message.lower()
            if any(word in msg_lower for word in ['fix', 'bug', 'error', 'issue']):
                analysis['commit_types']['bugfix'] += 1
            elif any(word in msg_lower for word in ['add', 'implement', 'feature', 'feat']):
                analysis['commit_types']['feature'] += 1
            elif any(word in msg_lower for word in ['refactor', 'clean', 'optimize', 'improve']):
                analysis['commit_types']['refactor'] += 1
            elif any(word in msg_lower for word in ['doc', 'readme', 'comment', 'wiki']):
                analysis['commit_types']['docs'] += 1
            elif any(word in msg_lower for word in ['chore', 'update', 'bump', 'merge']):
                analysis['commit_types']['chore'] += 1
            else:
                analysis['commit_types']['other'] += 1

            words = re.findall(r'\b\w+\b', message.lower())
            for word in words:
                if (word not in stop_words and len(word) > 3 and word.isalpha()):
                    word_counts[word] += 1

        except Exception:
            continue

    if len(messages) > 0:
        analysis['average_polarity'] = round(analysis['average_polarity'] / len(messages), 2)
    else:
        analysis['average_polarity'] = 0
    analysis['common_words'] = dict(sorted(word_counts.items(), key=lambda x: -x[1])[:10])
    return analysis

@app.route('/analyze/<username>', methods=['GET'])
@limiter.limit("30 per minute")
def analyze_github(username):
    try:
        username = sanitize_username(username)
        if not username:
            return jsonify({'error': 'Invalid username format'}), 400

        user_token = request.args.get('token')
        headers = get_headers(user_token)

        try:
            api_status = requests.get('https://api.github.com', headers=headers, timeout=5)
            if api_status.status_code != 200:
                return jsonify({'error': 'GitHub API unavailable'}), 502
        except requests.exceptions.RequestException:
            return jsonify({'error': 'GitHub API unavailable'}), 502

        try:
            user_response = requests.get(f'https://api.github.com/users/{username}', headers=headers, timeout=10)
            if user_response.status_code == 404:
                return jsonify({'error': 'User not found'}), 404
            if user_response.status_code == 403:
                return jsonify({'error': 'GitHub API rate limit exceeded'}), 429
            if user_response.status_code != 200:
                return jsonify({'error': 'GitHub API error'}), user_response.status_code
        except requests.exceptions.RequestException:
            return jsonify({'error': 'Failed to fetch user data'}), 502

        user_data = user_response.json()
        repos = get_all_pages(f'https://api.github.com/users/{username}/repos?per_page=100&sort=pushed', headers, max_pages=5)
        if not repos:
            return jsonify({'error': 'No public repositories found'}), 404

        one_year_ago = datetime.now() - timedelta(days=365)
        language_counts = defaultdict(int)
        days_active = set()
        commit_time_distribution = [0] * 24
        date_count = defaultdict(int)
        all_commits = []

        # Limit to top 10 most recently pushed repos for MVP
        repos_to_analyze = repos[:10]
        for repo in repos_to_analyze:
            if not repo.get('fork', False):
                try:
                    langs_response = requests.get(repo['languages_url'], headers=headers, timeout=10)
                    if langs_response.status_code == 200:
                        repo_langs = langs_response.json()
                        if repo_langs:
                            primary_lang = max(repo_langs.items(), key=lambda x: x[1])[0]
                            language_counts[primary_lang] += 1
                except requests.exceptions.RequestException:
                    continue

            try:
                commits_url = f"{repo['url']}/commits?since={one_year_ago.isoformat()}&author={username}&per_page=100"
                commits = get_all_pages(commits_url, headers, max_pages=3)  # Limit to 300 commits per repo max
                all_commits.extend(commits)

                for commit in commits:
                    if isinstance(commit, dict) and 'commit' in commit:
                        try:
                            date = datetime.strptime(commit['commit']['author']['date'], '%Y-%m-%dT%H:%M:%SZ')
                            days_active.add(date.date())
                            commit_time_distribution[date.hour] += 1
                            date_count[date.date().isoformat()] += 1
                        except (ValueError, KeyError):
                            continue
            except requests.exceptions.RequestException:
                continue

        weekly_commits = get_weekly_commits(all_commits, one_year_ago)

        top_languages = []
        if language_counts:
            top_languages = sorted([
                {
                    'name': lang,
                    'count': count,
                    'color': get_language_color(lang)
                }
                for lang, count in language_counts.items()
            ], key=lambda x: -x['count'])

            top_languages = normalize_language_percentages(top_languages)

        current_streak = max_streak = 0
        today = datetime.now().date()
        for i in range(365):
            day = today - timedelta(days=i)
            if day in days_active:
                current_streak += 1
                max_streak = max(max_streak, current_streak)
            else:
                current_streak = 0

        total_commits = sum(commit_time_distribution)
        night_owl = total_commits > 0 and sum(commit_time_distribution[22:] + commit_time_distribution[:4]) > total_commits * 0.4
        weekend_warrior = len(days_active) > 0 and sum(1 for date in days_active if date.weekday() >= 5) > len(days_active) * 0.3
        developer_personality = "Night Owl" if night_owl else "Weekend Warrior" if weekend_warrior else "Consistent Contributor"

        top_repos = sorted(
            [repo for repo in repos if not repo.get('fork', False)],
            key=lambda x: x.get('stargazers_count', 0),
            reverse=True
        )[:5]

        favorite_language = top_languages[0]['name'] if top_languages else "None"
        sentiment = analyze_commit_sentiment(all_commits)

        # Calculate additional insights
        total_repos = len([repo for repo in repos if not repo.get('fork', False)])
        total_forks = len([repo for repo in repos if repo.get('fork', False)])
        total_stars = sum(repo.get('stargazers_count', 0) for repo in repos)
        total_watchers = sum(repo.get('watchers_count', 0) for repo in repos)

        # Activity patterns
        active_hours = [i for i, count in enumerate(commit_time_distribution) if count > 0]
        most_active_hour = max(range(24), key=lambda x: commit_time_distribution[x]) if commit_time_distribution else 0
        total_active_days = len(days_active)

        # Repository insights
        repo_sizes = [repo.get('size', 0) for repo in repos if not repo.get('fork', False)]
        avg_repo_size = sum(repo_sizes) / len(repo_sizes) if repo_sizes else 0

        # Collaboration patterns
        collaboration_score = 0
        if total_repos > 0:
            repos_with_contributors = sum(1 for repo in repos if repo.get('forks_count', 0) > 0)
            collaboration_score = (repos_with_contributors / total_repos) * 100

        response_data = {
            'profile': {
                'username': username,
                'avatar_url': user_data.get('avatar_url'),
                'join_date': user_data.get('created_at', '')[:10],
                'name': user_data.get('name'),
                'bio': user_data.get('bio'),
                'location': user_data.get('location'),
                'company': user_data.get('company'),
                'blog': user_data.get('blog'),
                'twitter_username': user_data.get('twitter_username')
            },
            'stats': {
                'repos': total_repos,
                'forks': total_forks,
                'stars': total_stars,
                'watchers': total_watchers,
                'followers': user_data.get('followers', 0),
                'following': user_data.get('following', 0),
                'languages': top_languages,
                'avg_repo_size': round(avg_repo_size, 1),
                'collaboration_score': round(collaboration_score, 1),
                'activity': {
                    'weekly_commits': weekly_commits,
                    'streak': max_streak,
                    'current_streak': current_streak,
                    'total_active_days': total_active_days,
                    'commit_time_distribution': commit_time_distribution,
                    'most_active_hour': most_active_hour,
                    'active_hours': active_hours,
                    'contribution_data': [{'date': d, 'count': c} for d, c in date_count.items()],
                    'top_repos': [{
                        'name': repo['name'],
                        'stars': repo.get('stargazers_count', 0),
                        'forks': repo.get('forks_count', 0),
                        'description': repo.get('description', ''),
                        'url': repo['html_url'],
                        'language': repo.get('language'),
                        'size': repo.get('size', 0),
                        'updated_at': repo.get('updated_at')
                    } for repo in top_repos]
                },
                'developer_personality': developer_personality,
                'longest_streak': max_streak,
                'favorite_language': favorite_language,
                'insights': {
                    'productivity_score': round((total_active_days / 365) * 100, 1) if total_active_days > 0 else 0,
                    'consistency_score': round((max_streak / 365) * 100, 1) if max_streak > 0 else 0,
                    'collaboration_level': 'High' if collaboration_score > 50 else 'Medium' if collaboration_score > 20 else 'Low',
                    'activity_pattern': 'Night Owl' if most_active_hour >= 22 or most_active_hour <= 4 else 'Early Bird' if most_active_hour <= 8 else 'Day Developer',
                    'project_focus': 'Open Source' if total_stars > 100 else 'Personal Projects' if total_repos > 10 else 'Professional',
                    'experience_level': 'Veteran' if total_repos > 50 else 'Experienced' if total_repos > 20 else 'Intermediate' if total_repos > 5 else 'Beginner'
                }
            }
        }

        if sentiment:
            response_data['sentiment'] = sentiment

        return jsonify(response_data)

    except requests.exceptions.RequestException as e:
        print(f"Network error: {str(e)}")
        return jsonify({'error': 'Network error'}), 502
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return jsonify({'error': 'Server error'}), 500

if __name__ == '__main__':
    port = int(os.getenv('PORT', 5000))
    host = os.getenv('HOST', '0.0.0.0')
    if os.getenv('FLASK_ENV') == 'production':
        from waitress import serve
        serve(app, host=host, port=port, threads=4)
    else:
        app.run(host=host, port=port, debug=True)