-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathutils.py
More file actions
247 lines (203 loc) · 7.47 KB
/
utils.py
File metadata and controls
247 lines (203 loc) · 7.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import git
import git.exc
import hashlib
import logging
import os
import time
import re
import sys
from pathlib import Path
from typing import List, Optional, Union
from fastcore.net import HTTP404NotFoundError, HTTP403ForbiddenError # type: ignore
from ghapi.core import GhApi
class EvaluationError(Exception):
def __init__(self, repo: str, message: str, logger: logging.Logger):
super().__init__(message)
self.super_str = super().__str__()
self.repo = repo
self.log_file = "" # logger.log_file
self.logger = logger
def __str__(self):
return (
f"Evaluation error for {self.repo}: {self.super_str}\n"
f"Check ({self.log_file}) for more information."
)
def setup_logger(
repo: str, log_file: Path, mode: str = "w", verbose: int = 1
) -> logging.Logger:
"""Used for logging the build process of images and running containers.
It writes logs to the log file.
"""
log_file.parent.mkdir(parents=True, exist_ok=True)
logger = logging.getLogger(f"{repo}.{log_file.name}")
handler = logging.FileHandler(log_file, mode=mode)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
if verbose == 2:
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)
logger.setLevel(logging.INFO)
logger.propagate = False
setattr(logger, "log_file", log_file)
return logger
def close_logger(logger: logging.Logger) -> None:
"""Closes all handlers associated with the given logger to prevent too many open files."""
# To avoid too many open files
for handler in logger.handlers:
handler.close()
logger.removeHandler(handler)
def get_hash_string(input_string: str) -> str:
# Create a new SHA-256 hash object
sha256 = hashlib.sha256()
# Update the hash object with the bytes of the input string
sha256.update(input_string.encode("utf-8"))
# Obtain the hexadecimal digest of the hash
hash_hex = sha256.hexdigest()[:22]
return hash_hex
def extract_test_output(ss: str, pattern: str) -> str:
s = ss.split("\n")
out = []
append = False
for one in s:
if one.startswith("+") and pattern in one:
append = True
# the next command started here, so we finished reading test output
elif append and one.startswith("+"):
# remove the first element "+ {command}"
out = out[1:]
return "\n".join(out).strip()
if append:
out.append(one)
return ""
def clone_repo(
clone_url: str, clone_dir: str, branch: str, logger: logging.Logger
) -> git.Repo:
"""Clone repo into the specified directory if it does not already exist.
If the repository already exists in the specified directory,
it fetches the latest changes and checks out the specified commit.
Parameters
----------
clone_url : str
URL of the repository to clone.
clone_dir : str
Directory where the repository will be cloned.
branch : str
The branch/tag name to checkout.
logger : logging.Logger
The logger object.
Returns
-------
git.Repo
The cloned repository object.
Raises
------
RuntimeError
If cloning or checking out the repository fails.
"""
# Check if the repository already exists
if os.path.exists(clone_dir):
logger.info(f"Repository already exists at {clone_dir}. Fetching updates.")
try:
repo = git.Repo(clone_dir)
repo.git.fetch()
except git.exc.GitCommandError as e:
raise RuntimeError(f"Failed to fetch updates for repository: {e}")
else:
logger.info(f"Cloning {clone_url} into {clone_dir}")
try:
repo = git.Repo.clone_from(clone_url, clone_dir)
except git.exc.GitCommandError as e:
raise RuntimeError(f"Failed to clone repository: {e}")
try:
repo.git.checkout(branch)
except git.exc.GitCommandError as e:
raise RuntimeError(f"Failed to check out {branch}: {e}")
return repo
def create_repo_on_github(
organization: str, repo: str, logger: logging.Logger, token: Optional[str] = None
) -> None:
api = GhApi(token=token)
while True:
try:
api.repos.get(owner=organization, repo=repo) # type: ignore
logger.info(f"{organization}/{repo} already exists")
break
except HTTP403ForbiddenError:
while True:
rl = api.rate_limit.get() # type: ignore
logger.info(
f"Rate limit exceeded for the current GitHub token,"
f"waiting for 5 minutes, remaining calls: {rl.resources.core.remaining}"
)
if rl.resources.core.remaining > 0:
break
time.sleep(60 * 5)
except HTTP404NotFoundError:
api.repos.create_in_org(org=organization, name=repo) # type: ignore
logger.info(f"Created {organization}/{repo} on GitHub")
break
def generate_patch_between_commits(
repo: git.Repo, old_commit: str, new_commit: str
) -> str:
"""Generate a patch string by comparing two specified commits.
Args:
----
repo (git.Repo): An instance of the git.Repo object representing the repository.
old_commit (str): The hash or reference to the old commit.
new_commit (str): The hash or reference to the new commit.
Returns:
-------
patch (str): A string containing the patch in the diff format between the two commits
Raises:
------
git.GitCommandError: If there is an error while running git commands.
"""
try:
patch = repo.git.diff(
old_commit, new_commit, "--", ".", ":(exclude)spec.pdf.bz2"
)
return patch + "\n\n"
except git.GitCommandError as e:
raise Exception(f"Error generating patch: {e}")
def get_active_branch(repo_path: Union[str, Path]) -> str:
"""Retrieve the current active branch of a Git repository.
Args:
----
repo_path (Path): The path to git repo.
Returns:
-------
str: The name of the active branch.
Raises:
------
Exception: If the repository is in a detached HEAD state.
"""
repo = git.Repo(repo_path)
try:
# Get the current active branch
branch = repo.active_branch.name
except TypeError as e:
raise Exception(
f"{e}\nThis means the repository is in a detached HEAD state. "
"To proceed, please specify a valid branch by using --branch {branch}."
)
return branch
def extract_code_blocks(text: str) -> List[str]:
"""Extract Python code blocks from a given text wrapped in markdown markers.
This function identifies and extracts all Python code blocks within a provided
text. The code blocks should be surrounded by markdown-style markers, such as
```python ... ```.
Args:
----
text (str): The input text containing Python code blocks marked with
```python ... ```.
Returns:
-------
List[str]: A list of strings, each containing a Python code block extracted
from the text.
"""
pattern = r"```python\n(.*?)```"
matches = re.finditer(pattern, text, re.DOTALL)
return [match.group(1).strip() for match in matches]
__all__ = []