-
Notifications
You must be signed in to change notification settings - Fork 11
Start consolodating Github REST API URLs into a central place #301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
510fb98
53dc529
cd8aa72
0e803f8
d04b8c1
09a89c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -227,11 +227,19 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - | |
| """ | ||
| owner, repo = get_owner_repo(repo_git) | ||
|
|
||
| review_msg_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/comments" | ||
|
|
||
| logger = logging.getLogger(collect_pull_request_review_comments.__name__) | ||
| logger.debug(f"Collecting pull request review comments for {owner}/{repo}") | ||
|
|
||
| tool_source = "Pr review comment task" | ||
| tool_version = "2.0" | ||
| data_source = "Github API" | ||
|
|
||
| key_auth = GithubRandomKeyAuth(logger) | ||
| github_data_access = GithubDataAccess(key_auth, logger) | ||
|
|
||
| review_msg_search_args = {} | ||
|
|
||
| repo_id = get_repo_by_repo_git(repo_git).repo_id | ||
|
|
||
| if not full_collection: | ||
|
|
@@ -240,7 +248,7 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - | |
| if last_collected_date: | ||
| # Subtract 2 days to ensure all data is collected | ||
| core_data_last_collected = (last_collected_date - timedelta(days=2)).replace(tzinfo=timezone.utc) | ||
| review_msg_url += f"?since={core_data_last_collected.isoformat()}" | ||
| review_msg_search_args['since'] = core_data_last_collected.isoformat() | ||
| else: | ||
| logger.warning(f"core_data_last_collected is NULL for recollection on repo: {repo_git}") | ||
|
|
||
|
|
@@ -253,13 +261,6 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - | |
| logger.debug(f"{owner}/{repo} No PR reviews to collect review comments for") | ||
| return | ||
|
|
||
| tool_source = "Pr review comment task" | ||
| tool_version = "2.0" | ||
| data_source = "Github API" | ||
|
|
||
| key_auth = GithubRandomKeyAuth(logger) | ||
| github_data_access = GithubDataAccess(key_auth, logger) | ||
|
|
||
| pr_review_comment_batch_size = get_batch_size() | ||
|
|
||
| # Batch processing: accumulate comments until batch size reached, then flush | ||
|
|
@@ -268,6 +269,8 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - | |
| pr_review_msg_mapping_data = {} | ||
| total_refs_inserted = 0 | ||
|
|
||
| review_msg_url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls/comments", review_msg_search_args) | ||
|
|
||
| # Single-pass extraction: get both contributor and comment data together | ||
| for comment in github_data_access.paginate_resource(review_msg_url): | ||
| # Extract contributor | ||
|
|
@@ -512,7 +515,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: | |
| if index % 100 == 0: | ||
| logger.debug(f"{owner}/{repo} Processing PR {index + 1} of {pr_count}") | ||
|
|
||
| pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" | ||
| pr_review_url = github_data_access.endpoint_url(f"repos/{owner}/{repo}/pulls/{pr_number}/reviews") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this invocation doesnt start with /, but the one in the next file below this does (https://github.com/chaoss/CollectOSS/pull/301/changes#diff-383f6314669328d7cd3e867c6c988884aab0fe09a5e74c8444dcf12d925104d0R22) Might be worth making sure this is documented (or whether either format is able to be properly handled) so it can be invoked consistently |
||
|
|
||
| try: | ||
| pr_reviews = list(github_data_access.paginate_resource(pr_review_url)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pylint] reported by reviewdog 🐶
E0602: Undefined variable 'repo' (undefined-variable)