Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 143 additions & 20 deletions datasources/requests/taiga_api_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,18 @@
import requests
from datetime import datetime, timedelta, timezone
from utils.taiga_token.taiga_auth import get_taiga_token
from config.credentials_loader import (
CredentialsConfigError,
ProjectCredentialsNotFoundError,
resolve,
)

from config.settings import TAIGA_API_URL
from config.credentials_loader import resolve
from config.settings import TAIGA_API_URL, TAIGA_TOKEN

_CACHE = {} # key = (project_id, milestone_id) -> (timestamp, stats)
_DETAILS_CACHE = {} # key = (project_id, milestone_id) -> (timestamp, details)
_USERSTORY_CACHE = {} # key = (project_id, userstory_id) -> (timestamp, details)
_US_CUSTOM_ATTR_NAMES_CACHE = {} # key = project_id -> (timestamp, {attr_id(str): attr_name})
_TASK_CUSTOM_ATTR_NAMES_CACHE = {} # key = project_id -> (timestamp, {attr_id(str): attr_name})
TTL = timedelta(minutes=1) # Cache time-to-live, set to 5 minutes. Means that if the same request is made within 5 minutes, it will return the cached result instead of making a new API call.
logger = logging.getLogger(__name__)

_CACHE = {} # key = (project_id, milestone_id) -> (timestamp, stats)
MILESTONE_TIMEOUT = (3, 8)
TAIGA_LOOKUP_ERRORS = (
requests.exceptions.RequestException,
CredentialsConfigError,
ProjectCredentialsNotFoundError,
)
log = logging.getLogger(__name__)
logger = log
TAIGA_LOOKUP_ERRORS = (requests.RequestException,)


def _empty_stats():
Expand All @@ -36,18 +27,122 @@
}



def _build_taiga_headers(prj: str):
"""Return the Taiga headers needed for public and private deployments."""
if "api.taiga.io" in TAIGA_API_URL:
"""Return Taiga headers for public, private and SSO deployments."""
if TAIGA_TOKEN:
return {"Authorization": f"Bearer {TAIGA_TOKEN}"}

try:
user = resolve(prj, "taiga_user")
psw = resolve(prj, "taiga_password")
if user and psw:
except KeyError:
log.warning("No Taiga credentials configured for project %s; using anonymous requests.", prj)

Check warning on line 39 in datasources/requests/taiga_api_call.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Change this code to not log user-controlled data.

See more on https://sonarcloud.io/project/issues?id=Learning-Dashboard_LD_Connect_Event&issues=AZ22eFtbczfuCFFt2VKN&open=AZ22eFtbczfuCFFt2VKN&pullRequest=21
return {}

if user and psw:
try:
token = get_taiga_token(user, psw)
return {"Authorization": f"Bearer {token}"}
except requests.RequestException as exc:
log.warning("Failed to fetch Taiga token for project %s: %s", prj, exc)
return {}

log.warning("Incomplete Taiga credentials for project %s; using anonymous requests.", prj)

Check warning on line 50 in datasources/requests/taiga_api_call.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Change this code to not log user-controlled data.

See more on https://sonarcloud.io/project/issues?id=Learning-Dashboard_LD_Connect_Event&issues=AZ22eFtbczfuCFFt2VKO&open=AZ22eFtbczfuCFFt2VKO&pullRequest=21
return {}


def _userstory_custom_attribute_names(project_id: str, prj: str):
"""Return map of custom attribute id -> name for userstories in a project."""
if not project_id:
return {}

key = str(project_id)
now = datetime.now(timezone.utc)
if key in _US_CUSTOM_ATTR_NAMES_CACHE and now - _US_CUSTOM_ATTR_NAMES_CACHE[key][0] < TTL:
return _US_CUSTOM_ATTR_NAMES_CACHE[key][1]

headers = _build_taiga_headers(prj)
url = f"{TAIGA_API_URL}/userstory-custom-attributes"
try:
r = requests.get(url, params={"project": project_id}, headers=headers, timeout=(1, 5))
r.raise_for_status()
mapping = {str(item.get("id")): item.get("name") for item in (r.json() or []) if item.get("id") and item.get("name")}
except requests.RequestException as exc:
log.warning("Failed to fetch userstory custom attribute definitions for project %s: %s", project_id, exc)
mapping = {}

_US_CUSTOM_ATTR_NAMES_CACHE[key] = (now, mapping)
return mapping


def _userstory_custom_values(project_id: str, userstory_id: str, prj: str):
"""Fetch custom attribute values for a userstory and map IDs to attribute names."""
if not project_id or not userstory_id:
return {}

headers = _build_taiga_headers(prj)
url = f"{TAIGA_API_URL}/userstories/custom-attributes-values/{userstory_id}"
try:
r = requests.get(url, params={"project": project_id}, headers=headers, timeout=(1, 5))
r.raise_for_status()
raw_values = (r.json() or {}).get("attributes_values") or {}
except requests.RequestException as exc:
log.warning("Failed to fetch custom values for userstory %s in project %s: %s", userstory_id, project_id, exc)
return {}

names = _userstory_custom_attribute_names(project_id, prj)
mapped = {}
for attr_id, value in raw_values.items():
mapped[names.get(str(attr_id), str(attr_id))] = value
return mapped


def _task_custom_attribute_names(project_id: str, prj: str):
"""Return map of custom attribute id -> name for tasks in a project."""
if not project_id:
return {}

key = str(project_id)
now = datetime.now(timezone.utc)
if key in _TASK_CUSTOM_ATTR_NAMES_CACHE and now - _TASK_CUSTOM_ATTR_NAMES_CACHE[key][0] < TTL:
return _TASK_CUSTOM_ATTR_NAMES_CACHE[key][1]

headers = _build_taiga_headers(prj)
url = f"{TAIGA_API_URL}/task-custom-attributes"
try:
r = requests.get(url, params={"project": project_id}, headers=headers, timeout=(1, 5))
r.raise_for_status()
mapping = {str(item.get("id")): item.get("name") for item in (r.json() or []) if item.get("id") and item.get("name")}
except requests.RequestException as exc:
log.warning("Failed to fetch task custom attribute definitions for project %s: %s", project_id, exc)
mapping = {}

_TASK_CUSTOM_ATTR_NAMES_CACHE[key] = (now, mapping)
return mapping


def _task_custom_values(project_id: str, task_id: str, prj: str):
"""Fetch custom attribute values for a task and map IDs to attribute names."""
if not project_id or not task_id:
return {}

headers = _build_taiga_headers(prj)
url = f"{TAIGA_API_URL}/tasks/custom-attributes-values/{task_id}"
try:
r = requests.get(url, params={"project": project_id}, headers=headers, timeout=(1, 5))
r.raise_for_status()
raw_values = (r.json() or {}).get("attributes_values") or {}
except requests.RequestException as exc:
log.warning("Failed to fetch custom values for task %s in project %s: %s", task_id, project_id, exc)
return {}

names = _task_custom_attribute_names(project_id, prj)
mapped = {}
for attr_id, value in raw_values.items():
mapped[names.get(str(attr_id), str(attr_id))] = value
return mapped


def milestone_details(project_id: str, milestone_id: str, prj: str):
"""
Fetches the milestone metadata from Taiga.
Expand Down Expand Up @@ -94,6 +189,7 @@
"""
Fetches the userstory metadata from Taiga.
Used as a fallback when task payloads do not include the nested userstory state.
Also returns custom_attributes and description for recovery backfill.
"""
if not project_id or not userstory_id:
return {}
Expand All @@ -120,12 +216,39 @@
return {}

js = r.json()
custom_values = js.get("custom_attributes_values") or _userstory_custom_values(project_id, userstory_id, prj)
details = {
"userstory_is_closed": (js.get("status_extra_info") or {}).get("is_closed"),
"custom_attributes_values": custom_values or {},
"description": js.get("description") or "",
}
_USERSTORY_CACHE[key] = (now, details)
return details

def task_details(project_id: str, task_id: str, prj: str):
"""
Fetches the task metadata from Taiga.
Returns custom_attributes with fallback to dedicated endpoint if empty.
"""
if not project_id or not task_id:
return {}

headers = _build_taiga_headers(prj)
url = f"{TAIGA_API_URL}/tasks/{task_id}"
try:
r = requests.get(url, params={"project": project_id}, headers=headers, timeout=(1, 5))
r.raise_for_status()
js = r.json()
except requests.RequestException as exc:
log.warning("Failed to fetch task %s in project %s: %s", task_id, project_id, exc)
return {}

custom_values = js.get("custom_attributes_values") or _task_custom_values(project_id, task_id, prj)
details = {
"custom_attributes_values": custom_values or {},
}
return details

def milestone_stats(project_id: str, milestone_id: str, prj: str):
"""
Fetches the statistics of a milestone in a Taiga project.
Expand Down
8 changes: 4 additions & 4 deletions utils/pattern_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ class PatternDetector:
# Patrones regex compilados para optimización
PATTERNS = [
# English
r"\bas\s+[\w\s]+\s+i\s+want\s+[\w\s,.:;!?-]+\s+so\s+that\s+[\w\s,.:;!?-]+",
r"\bas\s+[\w\s]+\s+i\s+want\s+[\w\s,.:;!?-]+\s+to\s+[\w\s,.:;!?-]+",
r"\bas\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+i\s+want\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+so\s+that\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+",
r"\bas\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+i\s+want\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+to\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+",

# Spanish - COMO...QUIERO...
r"\bcomo\s+[\w\s]+\s+quiero\s+[\w\s,.:;!?-]+\s+(?:de\s+manera\s+que|de\s+forma\s+que|para|por|porqu[eé]|porque)\s+[\w\s,.:;!?-]+",
r"\bcomo\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+quiero\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+(?:de\s+manera\s+que|de\s+forma\s+que|para|por|porqu[eé]|porque)\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+",

# Catalan - COM...VULL...
r"\bcom\s+[\w\s]+\s+vull\s+[\w\s,.:;!?-]+\s+(?:de\s+manera\s+que|de\s+forma\s+que|per|perqu[eè]|perqué)\s+[\w\s,.:;!?-]+",
r"\bcom\s+(?:a\s+)?[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+vull\s+[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+\s+(?:de\s+manera\s+que|de\s+forma\s+que|per\s+a\s+poder|per\s+poder|per\s+tal\s+de|per\s+tal\s+d[’']|per|perqu[eè]|perqué)\s*[\w\s'àáäâäèéëêìíïîòóöôùúüûñçÀÁÄÂÈÉËÊÌÍÏÎÒÓÖÔÙÚÜÛÑÇ’()\/·,.:;!?-]+",
]

# Compilar patrones una sola vez
Expand Down
95 changes: 50 additions & 45 deletions utils/recovery/github_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,17 @@ def get_organization_repos(org: str, headers: Dict[str, str]) -> List[str]:
return [repo["name"] for repo in gh_paginated(url, headers)]


def gh_paginated(url: str, headers: Dict[str, str]) -> Iterable[Dict]:
def gh_paginated(url: str, headers: Dict[str, str], params: Optional[Dict[str, str]] = None) -> Iterable[Dict]:
"""
Gets paginated results from a GitHub API endpoint. With this each call to the API returns a suitable JSON
"""
while url:
r = requests.get(url, headers=headers, timeout=30)
r = requests.get(url, headers=headers, params=params, timeout=30)
r.raise_for_status()
yield from r.json()
url = r.links.get("next", {}).get("url")
# Link-based pagination already includes query params in next URL.
params = None


def upsert(coll, docs: list[dict], key: str) -> int:
Expand Down Expand Up @@ -81,53 +83,56 @@ def collect_github(
f"Bearer {GITHUB_TOKEN}" # Authentication with GitHub API using a token
)

counters = {
"commits": 0,
"issues": 0,
"pull_requests": 0,
} # Counter to display the number of documents inserted of each event type
author_login = "backfill" # The author login is always "backfill" for backfilling

for ev in events: # Start iterating over the events to collect

if ev == "commits": # First commits
event_name = "push" # The event name for commits is always "push"

log_url = f"https://api.github.com/repos/{repo_full}/commits?per_page=100"
if since:
log_url += (
f"&since={since}" # If a SINCE date is proviaded, add it to the URL
)
if until:
log_url += (
f"&until={until}" # If a UNTIL date is proviaded, add it to the URL
)

payloads = [] # List to store the payloads of the commits
for c in gh_paginated(
log_url, headers
): # Iterate over the paginated results of the commits and store them in the payloads list under the schema
payloads.append(
{
counters = {"commits": 0, "issues": 0, "pull_requests": 0} #Counter to display the number of documents inserted of each event type
author_login = "backfill" # The author login is always "backfill" for backfilling

for ev in events: # Start iterating over the events to collect

if ev == "commits": #First commits
event_name= "push" # The event name for commits is always "push"

# Fetch all branches first, then collect commits branch-by-branch.
branches_url = f"https://api.github.com/repos/{repo_full}/branches?per_page=100"
branches = [b.get("name") for b in gh_paginated(branches_url, headers) if isinstance(b, dict) and b.get("name")]
if not branches:
branches = [None]

payloads = [] #List to store the payloads of the commits
seen_shas = set() # Avoid processing the same commit multiple times across branches
for branch in branches:
log_url = f"https://api.github.com/repos/{repo_full}/commits?per_page=100"
query_params = {}
if branch:
query_params["sha"] = branch
if since:
query_params["since"] = since
if until:
query_params["until"] = until

# Iterate over paginated commits for each branch.
for c in gh_paginated(log_url, headers, params=query_params):
sha = c.get("sha")
if not sha or sha in seen_shas:
continue
seen_shas.add(sha)

payloads.append({
"X-GitHub-Event": "push",
"repository": {"full_name": repo_full},
"organization": {"login": org},
"sender": c["author"] or {},
"commits": [
{
"id": c["sha"],
"url": c["url"],
"message": c["commit"]["message"],
"timestamp": c["commit"]["author"]["date"],
"author": {
"username": (c["author"] or {}).get("login", ""),
"name": c["commit"]["author"]["name"],
"email": c["commit"]["author"]["email"],
},
}
],
}
)
"commits": [{
"id": sha,
"url": c["url"],
"message": c["commit"]["message"],
"timestamp": c["commit"]["author"]["date"],
"author": {
"username": (c["author"] or {}).get("login", ""),
"name": c["commit"]["author"]["name"],
"email": c["commit"]["author"]["email"],
},
}],
})

coll = get_collection(f"github_{prj}.commits") # Collection name to store
for (
Expand Down
Loading
Loading