Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/pyosmeta/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,11 @@ class ReviewModel(BaseModel):
gh_meta: Optional[GhMeta] = None
labels: list[str] = Field(default_factory=list)
active: bool = True # To indicate if package is maintained or archived
# Generative AI disclosure (from "Development Best Practices & GenerativeAI
# Use Disclosure" section; None when section is absent)
genai_used: Optional[bool] = None
genai_tools: Optional[str] = None
genai_scope: Optional[str] = None

@model_validator(mode="after")
def set_repository_host_from_link(self):
Expand Down
73 changes: 73 additions & 0 deletions src/pyosmeta/parse_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict:
meta["partners"] = self.get_categories(
body, "## Community Partnerships", 3, keyed=True
)
genai = self.get_genai_disclosure(body)
meta.update(genai)
if "joss_doi" in meta:
# Normalize the JOSS archive field. Some issues use `JOSS DOI` others `JOSS`
meta["joss"] = meta.pop("joss_doi")
Expand Down Expand Up @@ -394,6 +396,77 @@ def get_repo_paths(
all_repos[a_package] = {"owner": owner, "repo_name": repo}
return all_repos

def get_genai_disclosure(self, issue_list: list[str]) -> dict[str, Any]:
"""Parse the Development Best Practices & GenerativeAI Use Disclosure
section from the issue body.

Extracts whether GenAI was used (checkbox), the listed tools/frameworks,
and the description of nature/scope of support. Returns all None if the
section is absent (e.g. older submissions).

Parameters
----------
issue_list : list[str]
The issue body split into lines (after the first ---).

Returns
-------
dict
Keys: genai_used (bool or None), genai_tools (str or None),
genai_scope (str or None).
"""
result = {
"genai_used": None,
"genai_tools": None,
"genai_scope": None,
}
section_index = None
for i, line in enumerate(issue_list):
if "## " in line and (
"Development Best Practices" in line or "GenerativeAI" in line
):
section_index = i
break
if section_index is None:
return result

# Find the "Generative AI tools were used" checkbox in this section
genai_checkbox = "Generative AI tools were used"
for i in range(section_index + 1, len(issue_list)):
line = issue_list[i]
if line.strip().startswith("## "):
break
if genai_checkbox in line:
result["genai_used"] = bool(re.search(r"-\s*\[[xX]\]", line))
break

def _collect_after_subheading(needle: str) -> str | None:
idx = None
for i, line in enumerate(issue_list):
if "### " in line and needle in line:
idx = i
break
if idx is None:
return None
lines = []
for i in range(idx + 1, len(issue_list)):
line = issue_list[i]
if line.strip().startswith("### ") or line.strip().startswith(
"## "
):
break
lines.append(line)
text = "\n".join(lines).strip()
return text if text else None

result["genai_tools"] = _collect_after_subheading(
"Please list the tools"
)
result["genai_scope"] = _collect_after_subheading(
"Describe the nature and scope"
)
return result

def get_categories(
self,
issue_list: list[str],
Expand Down
6 changes: 2 additions & 4 deletions src/pyosmeta/utils_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ def clean_archive(archive):
if not archive:
# If field is empty, return None
return None
if archive.lower() in ("n/a", "tbd"):
return None
if archive.startswith("[") and archive.endswith(")"):
# Extract the outermost link
link = archive[archive.rfind("](") + 2 : -1]
Expand All @@ -237,10 +239,6 @@ def clean_archive(archive):
logger.warning(f"Invalid archive URL (not resolving): {archive}")
# raise ValueError(f"Invalid archive URL (not resolving): {archive}")
return archive
elif archive.lower() == "n/a":
return None
elif archive.lower() == "tbd":
return None
else:
raise ValueError(f"Invalid archive URL: {archive}")

Expand Down
68 changes: 68 additions & 0 deletions tests/data/reviews/submission_with_genai_section.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
Submitting Author: Author Name (@username)
All current maintainers: (@username, @username2)
Package Name: genai_test_package
One-Line Description of Package: A package that used GenAI during development
Repository Link: https://github.com/username/genai_test_package
Version submitted: v.0.1.0
Editor: @editoruser
Reviewer 1: @reviewer1
Reviewer 2: @reviewer2
Archive: TBD
JOSS DOI: TBD
Version accepted: v.0.1.0
Date accepted (month/day/year): 04/21/2024

---

## Code of Conduct & Commitment to Maintain Package

- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].

## Development Best Practices & GenerativeAI Use Disclosure

- [x] This package has a public development history spanning 3-6 months, with commits distributed over time that reflect **iterative, thoughtful development.**
- [x] All code in this package has been **carefully reviewed by a human**. Its implementation is also understood by the authors submitting the package.
- [x] All communication on this issue will be written by a human (someone on your maintainer team). We embrace the use of LLMs for translation and grammar correction. We prefer honest interactions over ones that prioritize perfect language and grammar. As little aid from a LLM as possible.
- [x] **Generative AI tools were used to develop and maintain this package.**

### Please list the tools and frameworks that you used below (Examples include Claude Code, Cursor, OpenClaw, ChatGPT, VSCode + Copilot)

Cursor, GitHub Copilot for autocomplete.

### Describe the nature and scope of support that LLMs provided. Examples include code generation, autocomplete, documentation development, refactoring, test development

Used for autocomplete and documentation drafting. No code generation.

## Description

Description of package that used GenAI during development.

## Scope

- Please indicate which category or categories.
Check out our [package scope page][PackageCategories] to learn more about our
scope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):

- [ ] Data retrieval
- [ ] Data extraction
- [x] Data processing/munging
- [ ] Data deposition
- [ ] Data validation and testing
- [ ] Data visualization[^1]
- [ ] Workflow automation
- [ ] Citation management and bibliometrics
- [ ] Scientific software wrappers
- [ ] Database interoperability

## Domain Specific

- [ ] Geospatial
- [ ] Education

## Community Partnerships
If your package is associated with an
existing community please check below:

- [x] Astropy: Link coming soon to standards
- [ ] Pangeo: My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]
31 changes: 31 additions & 0 deletions tests/integration/test_parse_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,34 @@ def test_repository_host_gitlab(process_issues, data_file):
review = data_file("reviews/gitlab_submission.txt", True)
review = process_issues.parse_issue(review)
assert review.repository_host == "gitlab"


def test_parse_submission_with_genai_section(process_issues, data_file):
"""
Integration test: full template ingest with the Development Best Practices
& GenerativeAI Use Disclosure section (real fixture data).

Ensures that adding the GenAI section to the submission template does not
break parsing, and that genai_used, genai_tools, and genai_scope are
extracted correctly alongside categories and partners.
"""
body = data_file("reviews/submission_with_genai_section.txt", True)
review = process_issues.parse_issue(body)

assert review.package_name == "genai_test_package"
assert review.genai_used is True
assert review.genai_tools is not None
assert "Cursor" in review.genai_tools
assert "Copilot" in review.genai_tools
assert review.genai_scope is not None
assert "autocomplete" in review.genai_scope
assert "documentation" in review.genai_scope

# Scope and Community Partnerships still parse correctly after GenAI section
assert review.categories is not None
assert "data-processing-munging" in review.categories
assert review.partners is not None
partner_values = [
p.value if hasattr(p, "value") else p for p in (review.partners or [])
]
assert "astropy" in partner_values