Skip to content

Commit 3077688

Browse files
NoopDogclaude
andcommitted
docs: add study entity PRD (#4376)
Add PRD for the new /index/studies entity endpoint in Azul, specifying new hit fields from DUOS, roll-up aggregation rules from child entities, and termFacet propagation to all entity endpoints. Includes example API responses verified against live DUOS/Azul data and Python scripts for coverage analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 0379be5 commit 3077688

6 files changed

Lines changed: 1100 additions & 0 deletions

File tree

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
{
2+
"hits": [
3+
{
4+
"entryId": "3e1a790a-8680-4dd2-b6f1-93ea3cba5e01",
5+
"sources": [
6+
{
7+
"source_prefix": "/0",
8+
"source_spec": "tdr:bigquery:gcp:datarepo-3ef65a17:ANVIL_CCDG_Broad_NP_Epilepsy_LEBABM_GRU_GSA_MD_20250721_ANV5_202508051741",
9+
"source_id": "7ea46791-7b08-4955-95a6-b606f7b916d6"
10+
}
11+
],
12+
"bundles": [
13+
{
14+
"bundle_uuid": "003db4c0-075b-a933-8993-697726aa77e0",
15+
"bundle_version": "2022-06-01T00:00:00.000000Z"
16+
}
17+
],
18+
"studies": [
19+
{
20+
"study_name": "Center for Common Disease Genomics [CCDG] - Neuropsychiatric: Epilepsy: Epi25 Consortium (phs001489)",
21+
"registered_identifier": "phs001489",
22+
"consortia": ["CCDG"]
23+
}
24+
],
25+
"datasets": [
26+
{
27+
"document_id": "3e1a790a-8680-4dd2-b6f1-93ea3cba5e01",
28+
"source_datarepo_row_ids": [
29+
"workspace_attributes:2fb99410-4515-4db6-90c5-e4401947d7b3"
30+
],
31+
"dataset_id": "00866376-c7ba-997e-44d3-819c1c33d67c",
32+
"consent_group": ["GRU"],
33+
"data_use_permission": ["GRU"],
34+
"owner": [null],
35+
"principal_investigator": [null],
36+
"registered_identifier": ["phs001489"],
37+
"title": "ANVIL_CCDG_Broad_NP_Epilepsy_LEBABM_GRU_GSA_MD",
38+
"data_modality": [null],
39+
"description": "[Description currently not available]",
40+
"duos_id": "DUOS-000710",
41+
"accessible": false
42+
}
43+
],
44+
"activities": [
45+
{
46+
"activity_type": ["Unknown"],
47+
"assay_type": [null],
48+
"data_modality": [null]
49+
}
50+
],
51+
"biosamples": [
52+
{
53+
"anatomical_site": [null],
54+
"biosample_type": [null],
55+
"disease": [null],
56+
"donor_age_at_collection_unit": [null],
57+
"donor_age_at_collection": [{ "gte": null, "lte": null }]
58+
}
59+
],
60+
"diagnoses": [],
61+
"donors": [],
62+
"files": [
63+
{
64+
"data_modality": [null],
65+
"file_format": [".vcf.gz"],
66+
"file_size": 117027877144,
67+
"reference_assembly": [null],
68+
"is_supplementary": [false, true],
69+
"count": 849
70+
},
71+
{
72+
"data_modality": [null],
73+
"file_format": [".idat"],
74+
"file_size": 15633649608,
75+
"reference_assembly": [null],
76+
"is_supplementary": [false],
77+
"count": 1692
78+
}
79+
]
80+
}
81+
],
82+
"termFacets": {
83+
"studies.registered_identifier": {
84+
"terms": [
85+
{ "term": "phs001489", "count": 149 },
86+
{ "term": "phs000920", "count": 20 }
87+
],
88+
"total": 382,
89+
"type": "terms"
90+
},
91+
"studies.study_name": {
92+
"terms": [
93+
{
94+
"term": "Center for Common Disease Genomics [CCDG] - Neuropsychiatric: Epilepsy: Epi25 Consortium (phs001489)",
95+
"count": 149
96+
}
97+
],
98+
"total": 382,
99+
"type": "terms"
100+
},
101+
"studies.consortia": {
102+
"terms": [
103+
{ "term": "CCDG", "count": 283 },
104+
{ "term": "CMG", "count": 20 },
105+
{ "term": "CSER", "count": 8 },
106+
{ "term": "GREGoR", "count": 7 }
107+
],
108+
"total": 382,
109+
"type": "terms"
110+
},
111+
"studies.study_design": {
112+
"terms": [
113+
{ "term": "Case-Control", "count": "..." },
114+
{ "term": "Cohort", "count": "..." }
115+
],
116+
"total": 382,
117+
"type": "terms"
118+
},
119+
"studies.phenotype_indication": {
120+
"terms": [
121+
{ "term": "Epilepsy", "count": "..." },
122+
{ "term": "Inflammatory Bowel Disease", "count": "..." }
123+
],
124+
"total": 382,
125+
"type": "terms"
126+
},
127+
"studies.data_types": {
128+
"terms": [
129+
{ "term": "WES", "count": "..." },
130+
{ "term": "WGS", "count": "..." },
131+
{ "term": "GSA-MD", "count": "..." }
132+
],
133+
"total": 382,
134+
"type": "terms"
135+
},
136+
"studies.principal_investigator": {
137+
"terms": [
138+
{ "term": "Ben Neale", "count": 149 },
139+
{ "term": "Daniel MacArthur", "count": "..." }
140+
],
141+
"total": 382,
142+
"type": "terms"
143+
},
144+
"datasets.consent_group": {
145+
"terms": [
146+
{ "term": "GRU", "count": 107 },
147+
{ "term": "HMB", "count": 27 },
148+
{ "term": "HMB-MDS", "count": 27 },
149+
{ "term": "HMB-NPU-MDS", "count": 18 }
150+
],
151+
"total": 382,
152+
"type": "terms"
153+
},
154+
"datasets.data_use_permission": {
155+
"terms": [
156+
{ "term": "GRU", "count": 107 },
157+
{ "term": "HMB", "count": 27 },
158+
{ "term": "HMB-MDS", "count": 27 }
159+
],
160+
"total": 382,
161+
"type": "terms"
162+
},
163+
"datasets.title": {
164+
"terms": [
165+
{
166+
"term": "ANVIL_CCDG_Broad_NP_Epilepsy_LEBABM_GRU_GSA_MD",
167+
"count": 1
168+
}
169+
],
170+
"total": 382,
171+
"type": "terms"
172+
},
173+
"biosamples.disease": {
174+
"terms": [{ "term": null, "count": 382 }],
175+
"total": 382,
176+
"type": "terms"
177+
},
178+
"biosamples.anatomical_site": {
179+
"terms": [
180+
{ "term": null, "count": 262 },
181+
{ "term": "Unknown", "count": 137 }
182+
],
183+
"total": 382,
184+
"type": "terms"
185+
},
186+
"biosamples.biosample_type": {
187+
"terms": [
188+
{ "term": null, "count": 259 },
189+
{ "term": "Blood", "count": 125 }
190+
],
191+
"total": 382,
192+
"type": "terms"
193+
},
194+
"diagnoses.disease": {
195+
"terms": [
196+
{ "term": "Autism spectrum disorder", "count": 37 },
197+
{ "term": "Inflammatory bowel disease", "count": 15 }
198+
],
199+
"total": 382,
200+
"type": "terms"
201+
},
202+
"diagnoses.phenotype": {
203+
"terms": [
204+
{ "term": null, "count": 370 },
205+
{ "term": "Agenesis of the Corpus Callosum", "count": 2 }
206+
],
207+
"total": 382,
208+
"type": "terms"
209+
},
210+
"diagnoses.phenopacket": {
211+
"terms": [{ "term": null, "count": 382 }],
212+
"total": 382,
213+
"type": "terms"
214+
},
215+
"files.data_modality": {
216+
"terms": [
217+
{ "term": null, "count": 377 },
218+
{ "term": "single-cell RNA sequencing assay", "count": 4 }
219+
],
220+
"total": 382,
221+
"type": "terms"
222+
},
223+
"files.file_format": {
224+
"terms": [
225+
{ "term": ".vcf.gz", "count": 288 },
226+
{ "term": ".tbi", "count": 277 },
227+
{ "term": ".cram", "count": 233 }
228+
],
229+
"total": 382,
230+
"type": "terms"
231+
},
232+
"files.is_supplementary": {
233+
"terms": [
234+
{ "term": "true", "count": 347 },
235+
{ "term": "false", "count": 344 }
236+
],
237+
"total": 382,
238+
"type": "terms"
239+
},
240+
"activities.activity_type": {
241+
"terms": [
242+
{ "term": "Indexing", "count": 283 },
243+
{ "term": "Unknown", "count": 210 },
244+
{ "term": "Sequencing", "count": 130 }
245+
],
246+
"total": 382,
247+
"type": "terms"
248+
},
249+
"activities.assay_type": {
250+
"terms": [
251+
{ "term": null, "count": 382 },
252+
{ "term": "WGS", "count": 1 }
253+
],
254+
"total": 382,
255+
"type": "terms"
256+
},
257+
"activities.data_modality": {
258+
"terms": [
259+
{ "term": null, "count": 382 },
260+
{ "term": "Genomic", "count": 7 }
261+
],
262+
"total": 382,
263+
"type": "terms"
264+
},
265+
"donors.organism_type": {
266+
"terms": [
267+
{ "term": "Human", "count": 199 },
268+
{ "term": null, "count": 180 }
269+
],
270+
"total": 382,
271+
"type": "terms"
272+
},
273+
"donors.phenotypic_sex": {
274+
"terms": [
275+
{ "term": "Female", "count": 190 },
276+
{ "term": "Male", "count": 190 }
277+
],
278+
"total": 382,
279+
"type": "terms"
280+
},
281+
"donors.reported_ethnicity": {
282+
"terms": [
283+
{ "term": null, "count": 334 },
284+
{ "term": "Black or African American", "count": 54 }
285+
],
286+
"total": 382,
287+
"type": "terms"
288+
},
289+
"datasets.registered_identifier": {
290+
"terms": [
291+
{ "term": "phs001489", "count": 149 },
292+
{ "term": "phs000920", "count": 20 }
293+
],
294+
"total": 382,
295+
"type": "terms"
296+
},
297+
"files.reference_assembly": {
298+
"terms": [{ "term": "...", "count": "..." }],
299+
"total": 382,
300+
"type": "terms"
301+
},
302+
"accessible": {
303+
"terms": [
304+
{ "term": "true", "count": "..." },
305+
{ "term": "false", "count": "..." }
306+
],
307+
"total": 382,
308+
"type": "terms"
309+
}
310+
}
311+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"datasetId": 727,
3+
"datasetIdentifier": "DUOS-000667",
4+
"datasetName": "ANVIL_CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSA_MD (HMB-IRB-MDS)",
5+
"name": "ANVIL_CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSA_MD (HMB-IRB-MDS)",
6+
"alias": 667,
7+
"dacApproval": true,
8+
"dacId": 2,
9+
"studyId": 111,
10+
"dataUse": "...",
11+
"properties": "...",
12+
"study": {
13+
"studyId": 111,
14+
"name": "Center for Common Disease Genomics [CCDG] - Neuropsychiatric: Epilepsy: Epi25 Consortium (phs001489)",
15+
"description": "Epilepsy genetics research is at an exciting stage where it is now feasible, with the power of a large cohort, to understand the more complex genetic components of epilepsy...",
16+
"piName": "Ben Neale",
17+
"dataTypes": ["GSA-MD", "WES"],
18+
"datasetIds": [768, 512, 769, "...149 total"],
19+
"properties": [
20+
{
21+
"key": "collaboratingSites",
22+
"type": "Json",
23+
"value": ["CCDG"]
24+
},
25+
{
26+
"key": "dbGaPPhsID",
27+
"type": "String",
28+
"value": "phs001489"
29+
},
30+
{
31+
"key": "phenotypeIndication",
32+
"type": "String",
33+
"value": "Epilepsy"
34+
},
35+
{
36+
"key": "data",
37+
"type": "Json",
38+
"value": {
39+
"tags": [
40+
"Platform: AnVIL",
41+
"Platform: NCPI",
42+
"dbGaP_phs_id: phs001489",
43+
"dbGaP_accession: phs001489.v4.p2",
44+
"dbGaP_study_design: Case-Control"
45+
]
46+
}
47+
}
48+
]
49+
}
50+
}

0 commit comments

Comments
 (0)