This repository was archived by the owner on Feb 1, 2026. It is now read-only.
forked from security-alliance/frameworks
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
97 lines (80 loc) · 3.66 KB
/
main.py
File metadata and controls
97 lines (80 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import csv
import yaml
import re
from collections import OrderedDict
from openai import OpenAI
import time
client = OpenAI()
# ---- Config ----
MODEL = "gpt-5-nano" # or "gpt-5-nano" if available
BATCH_SIZE = 15 # how many descriptions per request
SLEEP_BETWEEN_BATCHES = 0.5 # small pause to avoid rate limits
# -----------------
def generate_titles_batch(descriptions):
"""Generate short titles for a batch of control descriptions."""
joined = "\n".join(f"{i+1}. {d}" for i, d in enumerate(descriptions))
prompt = (
"For each numbered control description below, write a concise, professional title "
"(4-6 words max) summarizing it. Do not modify or paraphrase the description. "
"Return only a numbered list of titles.\n\n"
f"{joined}"
)
response = client.chat.completions.create(
model=MODEL,
messages=[{"role": "user", "content": prompt}],
)
text = response.choices[0].message.content.strip()
lines = [line.strip() for line in text.split("\n") if line.strip()]
titles = []
for i, line in enumerate(lines):
# Try to remove leading numbering or punctuation
title = re.sub(r"^\d+[\.\)\-:]*\s*", "", line).strip()
titles.append(title)
return titles
def csv_to_yaml(input_path, output_path, add_titles=False):
sections = OrderedDict()
current_section = None
section_counter = 0
all_controls = [] # for batching
with open(input_path, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
section_raw = (row.get("Section") or "").strip()
control_id = (row.get("Control ID") or "").strip()
question = ''.join((row.get("Question") or "").strip().splitlines())
if not section_raw:
continue
# Detect new section header like "Section 1 - Team Structure, Roles & Responsibilities"
if " - " in section_raw and not control_id and not question:
section_counter += 1
parts = section_raw.split(" - ", 1)
section_title = parts[1].strip() if len(parts) > 1 else section_raw
current_section = f"section-{section_counter}"
sections[current_section] = {"title": section_title, "controls": []}
continue
if current_section is None:
continue
if control_id and question:
clean_q = re.sub(r"\s+", " ", question.strip())
control = {"id": control_id, "description": clean_q}
sections[current_section]["controls"].append(control)
all_controls.append(control)
# Add titles in batches
if add_titles:
print(f"🧠 Generating titles in batches of {BATCH_SIZE}...")
for i in range(0, len(all_controls), BATCH_SIZE):
batch = all_controls[i:i+BATCH_SIZE]
titles = generate_titles_batch([c["description"] for c in batch])
for control, title in zip(batch, titles):
control["title"] = title
print(f" Added titles for {i+1}-{i+len(batch)}")
time.sleep(SLEEP_BETWEEN_BATCHES)
cert = [{"id": sid, "title": data["title"], "controls": data["controls"]} for sid, data in sections.items()]
yaml_data = {"cert": cert}
with open(output_path, "w", encoding="utf-8") as f:
yaml.dump(yaml_data, f, sort_keys=False, allow_unicode=True, width=90)
print(f"✅ Converted {input_path} → {output_path}")
if add_titles:
print("✅ Titles added via batched OpenAI calls.")
if __name__ == "__main__":
csv_to_yaml("input.csv", "output_with_titles.yaml", add_titles=True)